├── .gitignore
├── LICENSE
├── README.md
├── assets
├── p.png
├── test.png
└── test_disp.png
├── config
├── __init__.py
├── cfg_cityscape.py
├── cfg_eth3d_autoencoder.py
├── cfg_eth3d_fm.py
├── cfg_euroc_autoencoder.py
├── cfg_euroc_fm.py
├── cfg_folder.py
├── cfg_kitti_autoencoder.py
├── cfg_kitti_fm.py
├── cfg_kitti_fm_joint.py
├── cfg_kitti_fm_refine.py
├── cfg_make3d_fm.py
└── cfg_odom_fm.py
├── mono
├── __init__.py
├── apis
│ ├── __init__.py
│ ├── env.py
│ └── trainer.py
├── core
│ ├── __init__.py
│ ├── evaluation
│ │ ├── __init__.py
│ │ ├── eval_hooks.py
│ │ └── pixel_error.py
│ └── utils
│ │ ├── __init__.py
│ │ ├── dist_utils.py
│ │ └── misc.py
├── datasets
│ ├── __init__.py
│ ├── cityscape_dataset.py
│ ├── eth3d_dataset.py
│ ├── euroc_dataset.py
│ ├── folder_dataset.py
│ ├── get_dataset.py
│ ├── gt_pose
│ │ ├── 00.txt
│ │ ├── 01.txt
│ │ ├── 02.txt
│ │ ├── 03.txt
│ │ ├── 04.txt
│ │ ├── 05.txt
│ │ ├── 06.txt
│ │ ├── 07.txt
│ │ ├── 08.txt
│ │ ├── 09.txt
│ │ ├── 10.txt
│ │ └── 12.txt
│ ├── kitti_dataset.py
│ ├── kitti_utils.py
│ ├── loader
│ │ ├── __init__.py
│ │ ├── build_loader.py
│ │ └── sampler.py
│ ├── mono_dataset.py
│ ├── splits
│ │ ├── __init__.py
│ │ ├── benchmark
│ │ │ ├── eigen_to_benchmark_ids.npy
│ │ │ ├── test_files.txt
│ │ │ ├── train_files.txt
│ │ │ └── val_files.txt
│ │ ├── cityscape
│ │ │ ├── gen_cityscape_split.py
│ │ │ ├── test.txt
│ │ │ ├── train.txt
│ │ │ ├── train_files.txt
│ │ │ ├── val.txt
│ │ │ └── val_files.txt
│ │ ├── eigen_benchmark
│ │ │ └── test_files.txt
│ │ ├── eigen_full
│ │ │ ├── train_files.txt
│ │ │ └── val_files.txt
│ │ ├── exp
│ │ │ ├── __init__.py
│ │ │ ├── train_files.txt
│ │ │ └── val_files.txt
│ │ ├── kitti_archives_to_download.txt
│ │ ├── kitti_shot_sequence
│ │ │ ├── gen_split.py
│ │ │ └── val_files.txt
│ │ ├── odom
│ │ │ ├── test_files_09.txt
│ │ │ ├── test_files_10.txt
│ │ │ ├── train_files.txt
│ │ │ └── val_files.txt
│ │ ├── short
│ │ │ ├── __init__.py
│ │ │ ├── train_files.txt
│ │ │ └── val_files.txt
│ │ └── test
│ │ │ ├── train_files.txt
│ │ │ └── val_files.txt
│ └── utils.py
├── model
│ ├── __init__.py
│ ├── mono_autoencoder
│ │ ├── __init__.py
│ │ ├── decoder.py
│ │ ├── encoder.py
│ │ ├── layers.py
│ │ ├── net.py
│ │ └── resnet.py
│ ├── mono_baseline
│ │ ├── __init__.py
│ │ ├── depth_decoder.py
│ │ ├── depth_encoder.py
│ │ ├── layers.py
│ │ ├── net.py
│ │ ├── pose_decoder.py
│ │ ├── pose_encoder.py
│ │ └── resnet.py
│ ├── mono_fm
│ │ ├── __init__.py
│ │ ├── depth_decoder.py
│ │ ├── depth_encoder.py
│ │ ├── layers.py
│ │ ├── net.py
│ │ ├── pose_decoder.py
│ │ ├── pose_encoder.py
│ │ └── resnet.py
│ ├── mono_fm_joint
│ │ ├── __init__.py
│ │ ├── decoder.py
│ │ ├── depth_decoder.py
│ │ ├── depth_encoder.py
│ │ ├── encoder.py
│ │ ├── layers.py
│ │ ├── net.py
│ │ ├── pose_decoder.py
│ │ ├── pose_encoder.py
│ │ └── resnet.py
│ └── registry.py
└── tools
│ ├── __init__.py
│ ├── file_interface.py
│ ├── geometry.py
│ ├── kitti_evaluation_toolkit.py
│ ├── lie_algebra.py
│ ├── pose_evaluation_utils.py
│ ├── trajectory.py
│ └── transformations.py
├── requirements.txt
├── run.py
├── scripts
├── __init__.py
├── draw_odometry.py
├── eval_depth.py
├── eval_depth_pp.py
├── eval_pose.py
├── infer.py
└── infer_singleimage.py
└── train.py
/.gitignore:
--------------------------------------------------------------------------------
1 | __pycache__
2 | *.pyc
3 | .idea
4 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2020 sconly
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # feature_metric_depth
2 | This is offical codes for the methods described in
3 | > **Feature-metric Loss for Self-supervised Learning of Depth and Egomotion**
4 | >
5 | > [ECCV 2020](https://arxiv.org/pdf/2007.10603.pdf)
6 |
7 |
8 |
9 |
10 |
11 | If you find our work useful in your research please consider citing our paper:
12 |
13 | ```
14 | @inproceedings{shu2020featdepth,
15 | title={Feature-metric Loss for Self-supervised Learning of Depth and Egomotion},
16 | author={Shu, Chang and Yu, Kun and Duan, Zhixiang and Yang, Kuiyuan},
17 | booktitle={ECCV},
18 | year={2020}
19 | }
20 | ```
21 |
22 | ## Setup
23 |
24 | ### Requirements:
25 | - PyTorch1.1+, Python3.5+, Cuda10.0+
26 | - mmcv==0.4.4
27 |
28 | Our codes are based on mmcv for distributed learning.
29 | To make it convenient for you to train and test our codes, we provide our [anaconda environment](https://drive.google.com/file/d/1NSoGxhP8UpyW-whzpqP3WIB6u2mgGP49/view?usp=sharing),
30 | you only need to download it and extract it to the folder of your anaconda environments, and use the python in it to run our codes.
31 |
32 | If you would like to set up your anaconda environment by yourself, you can do as follows:
33 | ```bash
34 | # first, make sure that your conda is setup properly with the right environment
35 | # for that, check that `which conda`, `which pip` and `which python` points to the
36 | # right path. From a clean conda env, this is what you need to do
37 |
38 | conda create --name featdepth python=3.7
39 | conda activate featdepth
40 |
41 | # this installs the right pip and dependencies for the fresh python
42 | conda install ipython
43 | conda install pip
44 |
45 | # install required packages from requirements.txt
46 | pip install -r requirements.txt
47 | ```
48 |
49 | ## KITTI training data
50 |
51 | Our training data is the same with other self-supervised monocular depth estimation methods, please refer to [monodepth2](https://github.com/nianticlabs/monodepth2) to prepare the training data.
52 |
53 | ## pretrained weights
54 |
55 | We provide weights for:
56 | (1) [AutoEncoder trained on the kitti raw data](https://drive.google.com/file/d/1ncAWUMvLq2ETMpG-7eI9qfILce_cPPfy/view?usp=sharing);
57 | (2) [FeatDepth trained on the kitti raw data](https://drive.google.com/file/d/1HlAubfuja5nBKpfNU3fQs-3m3Zaiu9RI/view?usp=sharing);
58 | (3) [FeatDepth finetuned on the test split of kitti raw data by using online refinement](https://drive.google.com/file/d/1CfCtz55s4QHya3y3UslxsuD_0cxNlA-D/view?usp=sharing);
59 | (4) [FeatDepth trained on kitti odometry](https://drive.google.com/file/d/1vQJbiyPXv_XNQYpyVocDB3-LKwx2LVka/view?usp=sharing);
60 | (5) [FeatDepth trained on Euroc](https://drive.google.com/file/d/1IMIAKpHXmqyUxiUIiqqp5qI-nJXDUSmj/view?usp=sharing);
61 | (6) [FeatDepth trained on NYU](https://drive.google.com/file/d/1Mo050P-DgG-jrNXWww07GXXyst5h5Q74/view?usp=sharing).
62 |
63 | ## API
64 | We provide an API interface for you to predict depth and pose from an image sequence and visulize some results.
65 | They are stored in folder 'scripts'.
66 | ```
67 | draw_odometry.py is used to provide several analytical curves and obtain standard kitti odometry evaluation results.
68 | ```
69 |
70 | ```
71 | eval_pose.py is used to obtain kitti odometry evaluation results.
72 | ```
73 |
74 | ```
75 | eval_depth.py is used to obtain kitti depth evaluation results.
76 | ```
77 |
78 | ```
79 | infer.py is used to generate depth maps from given models.
80 | ```
81 |
82 | ```
83 | infer_singleimage.py is used to test a single image for view.
84 | ```
85 | ## Training
86 | You can use following command to launch distributed learning of our model:
87 | ```shell
88 | /path/to/python -m torch.distributed.launch --master_port=9900 --nproc_per_node=1 train.py --config /path/to/cfg_kitti_fm.py --work_dir /dir/for/saving/weights/and/logs'
89 | ```
90 | Here nproc_per_node refers to GPU number you want to use.
91 |
92 | ## Configurations
93 | We provide a variety of config files for training on different datasets.
94 | They are stored in config folder.
95 |
96 | For example:
97 | (1) 'cfg_kitti_fm.py' is used to train our model on kitti dataset, where the weights of autoencoder are loaded from the pretrained weights we provide and fixed during the traing.
98 | This mode is prefered when your GPU memory is lower than 16 GB;
99 | (2) 'cfg_kitti_fm_joint.py' is used to train our model on kitti dataset, where the autoencoder is jointly trained with depthnet and posenet.
100 | We rescale the input resolution of our model to ensure training with 12 GB GPU memory, slightly reducing the performance.
101 | You can modify the input resolution according to your computational resource.
102 |
103 | For modifying config files, please refer to cfg_kitti_fm.py.
104 |
105 | ## Online refinement
106 | We provide cfg file for online refinement, you can use cfg_kitti_fm_refine.py to refine your model trained on kitti raw data by keeping training on test data.
107 | For settings of online refinement, please refer to details in cfg_kitti_fm_refine.py in the folder config.
108 |
109 | ## Finetuning
110 | If you want to finetune on a given weights, you can modify the 'finetune' term from 'None' to an existing path to a pre-trained weight in the config files.
111 |
112 | ## Resuming
113 | If you want to reproduce the training state of a certain pretrained weight, you can modify the 'resume_from' term from 'None' to an existing path to a pre-trained weight in the config files.
114 | The program will continue training from where the pretrained weight ends.
115 | Note that you have to increase the 'total_epochs' value to make sure that the training have enough epochs left to continue.
116 |
117 | ## Notes
118 | Our model predicts inverse depths.
119 | If you want to get real depth when training stereo model, you have to convert inverse depth to depth, and then multiply it by 36.
120 |
--------------------------------------------------------------------------------
/assets/p.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sconlyshootery/FeatDepth/550420b3fb51a027549716b74c6fbce41651d3a5/assets/p.png
--------------------------------------------------------------------------------
/assets/test.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sconlyshootery/FeatDepth/550420b3fb51a027549716b74c6fbce41651d3a5/assets/test.png
--------------------------------------------------------------------------------
/assets/test_disp.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sconlyshootery/FeatDepth/550420b3fb51a027549716b74c6fbce41651d3a5/assets/test_disp.png
--------------------------------------------------------------------------------
/config/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sconlyshootery/FeatDepth/550420b3fb51a027549716b74c6fbce41651d3a5/config/__init__.py
--------------------------------------------------------------------------------
/config/cfg_cityscape.py:
--------------------------------------------------------------------------------
1 | split = 'cityscape'
2 | dataset = 'cityscape'
3 |
4 | height = 384
5 | width = 768
6 | disparity_smoothness = 1e-3
7 | scales = [0, 1, 2, 3, 4]
8 | min_depth = 0.1
9 | max_depth = 100.0
10 | frame_ids = [0, -1, 1]
11 | learning_rate = 1e-4
12 |
13 | depth_num_layers = 50
14 | pose_num_layers = 50
15 | total_epochs = 45
16 | device_ids = range(8)
17 |
18 | depth_pretrained_path = '/node01/jobs/io/pretrained/checkpoints/resnet/resnet{}.pth'.format(depth_num_layers)
19 | pose_pretrained_path = '/node01/jobs/io/pretrained/checkpoints/resnet/resnet{}.pth'.format(pose_num_layers)
20 |
21 | in_path = '/ssd/Cityscapes'
22 | gt_depth_path = '/node01_data5/monodepth2-test/monodepth2/gt_depths.npz'
23 | checkpoint_path = '/node01_data5/monodepth2-test/model/refine/smallfigure.pth'
24 |
25 | imgs_per_gpu = 2
26 | workers_per_gpu = 2
27 |
28 | validate = False
29 |
30 | png = True
31 | scale_invariant = False
32 | plane_fitting = False
33 | finetune = False
34 | perception = False
35 | focus_loss = False
36 |
37 | scale_invariant_weight = 0.01
38 | plane_fitting_weight = 0.0001
39 | perceptional_weight = 0.001
40 |
41 | optimizer = dict(type='Adam', lr=learning_rate, weight_decay=0)
42 | optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
43 | # learning policy
44 | lr_config = dict(
45 | policy='step',
46 | warmup='linear',
47 | warmup_iters=500,
48 | warmup_ratio=1.0 / 3,
49 | step=[15,25,35],
50 | gamma=0.5,
51 | )
52 |
53 | checkpoint_config = dict(interval=1)
54 | # yapf:disable
55 | log_config = dict(interval=50,
56 | hooks=[dict(type='TextLoggerHook'),])
57 | # yapf:enable
58 | # runtime settings
59 | dist_params = dict(backend='nccl')
60 | log_level = 'INFO'
61 | load_from = None
62 | resume_from = None
63 | workflow = [('train', 1)]
--------------------------------------------------------------------------------
/config/cfg_eth3d_autoencoder.py:
--------------------------------------------------------------------------------
1 | DEPTH_LAYERS = 50
2 | POSE_LAYERS = 18
3 | FRAME_IDS = [0]
4 | IMGS_PER_GPU = 3
5 | HEIGHT = 448
6 | WIDTH = 736
7 |
8 | data = dict(
9 | name = 'eth3d',
10 | split = 'exp',
11 | height = HEIGHT,
12 | width = WIDTH,
13 | frame_ids = FRAME_IDS,
14 | in_path = '/ssd/ETH3D/slam/cables_4',
15 | gt_depth_path = None,
16 | png = True,
17 | stereo_scale = False,
18 | )
19 |
20 | model = dict(
21 | name = 'autoencoder',
22 | depth_num_layers = DEPTH_LAYERS,
23 | pose_num_layers = POSE_LAYERS,
24 | frame_ids = FRAME_IDS,
25 | imgs_per_gpu = IMGS_PER_GPU,
26 | height = HEIGHT,
27 | width = WIDTH,
28 | scales = [0, 1, 2, 3],
29 | min_depth = 0.1,
30 | max_depth = 100.0,
31 | depth_pretrained_path = '/node01/jobs/io/pretrained/checkpoints/resnet/resnet{}.pth'.format(DEPTH_LAYERS),
32 | pose_pretrained_path = '/node01/jobs/io/pretrained/checkpoints/resnet/resnet{}.pth'.format(POSE_LAYERS),
33 | automask = True,
34 | disp_norm = True,
35 | use_min_construct = True,
36 | dis=0.001,
37 | cvt=0.001,
38 | )
39 |
40 |
41 | # resume_from = '/node01_data5/monodepth2-test/model/ms/ms.pth'
42 | resume_from = None
43 | finetune = None
44 | total_epochs = 30
45 | imgs_per_gpu = IMGS_PER_GPU
46 | learning_rate = 1e-4
47 | workers_per_gpu = 4
48 | validate = False
49 |
50 | optimizer = dict(type='Adam', lr=learning_rate, weight_decay=0)
51 | optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
52 | lr_config = dict(
53 | policy='step',
54 | warmup='linear',
55 | warmup_iters=500,
56 | warmup_ratio=1.0 / 3,
57 | step=[10,20],
58 | gamma=0.5,
59 | )
60 |
61 | checkpoint_config = dict(interval=1)
62 | log_config = dict(interval=50,
63 | hooks=[dict(type='TextLoggerHook'),])
64 | dist_params = dict(backend='nccl')
65 | log_level = 'INFO'
66 | load_from = None
67 | workflow = [('train', 1)]
--------------------------------------------------------------------------------
/config/cfg_eth3d_fm.py:
--------------------------------------------------------------------------------
1 | DEPTH_LAYERS = 50
2 | POSE_LAYERS = 18
3 | FRAME_IDS = [0, -1, 1, 's']
4 | IMGS_PER_GPU = 2
5 | HEIGHT = 448
6 | WIDTH = 736
7 |
8 | data = dict(
9 | name = 'eth3d',
10 | split = 'exp',
11 | height = HEIGHT,
12 | width = WIDTH,
13 | frame_ids = FRAME_IDS,
14 | in_path = '/ssd/ETH3D/slam/cables_4',
15 | gt_depth_path = None,
16 | png = True,
17 | stereo_scale = True if 's' in FRAME_IDS else False,
18 | )
19 |
20 | model = dict(
21 | name = 'mono_fm',
22 | depth_num_layers = DEPTH_LAYERS,
23 | pose_num_layers = POSE_LAYERS,
24 | frame_ids = FRAME_IDS,
25 | imgs_per_gpu = IMGS_PER_GPU,
26 | height = HEIGHT,
27 | width = WIDTH,
28 | scales = [0, 1, 2, 3],
29 | min_depth = 0.1,
30 | max_depth = 100.0,
31 | depth_pretrained_path = '/node01/jobs/io/pretrained/checkpoints/resnet/resnet{}.pth'.format(DEPTH_LAYERS),
32 | pose_pretrained_path = '/node01/jobs/io/pretrained/checkpoints/resnet/resnet{}.pth'.format(POSE_LAYERS),
33 | extractor_pretrained_path = '/node01/jobs/io/out/changshu/autoencoder_eth_1/epoch_30.pth',
34 | automask = False if 's' in FRAME_IDS else True,
35 | disp_norm = False if 's' in FRAME_IDS else True,
36 | perception_weight = 0,
37 | smoothness_weight = 1e-3,
38 | )
39 |
40 | # resume_from = '/node01_data5/monodepth2-test/model/ms/ms.pth'
41 | resume_from = None
42 | finetune = None
43 | total_epochs = 40
44 | imgs_per_gpu = IMGS_PER_GPU
45 | learning_rate = 1e-4
46 | workers_per_gpu = 4
47 | validate = False
48 |
49 | optimizer = dict(type='Adam', lr=learning_rate, weight_decay=0)
50 | optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
51 | lr_config = dict(
52 | policy='step',
53 | warmup='linear',
54 | warmup_iters=500,
55 | warmup_ratio=1.0 / 3,
56 | step=[20,30],
57 | gamma=0.5,
58 | )
59 |
60 | checkpoint_config = dict(interval=1)
61 | log_config = dict(interval=5,
62 | hooks=[dict(type='TextLoggerHook'),])
63 | dist_params = dict(backend='nccl')
64 | log_level = 'INFO'
65 | load_from = None
66 | workflow = [('train', 1)]
--------------------------------------------------------------------------------
/config/cfg_euroc_autoencoder.py:
--------------------------------------------------------------------------------
1 | DEPTH_LAYERS = 50
2 | POSE_LAYERS = 18
3 | FRAME_IDS = [0]
4 | IMGS_PER_GPU = 3
5 | HEIGHT = 480
6 | WIDTH = 768
7 |
8 | data = dict(
9 | name = 'euroc',
10 | split = 'exp',
11 | height = HEIGHT,
12 | width = WIDTH,
13 | frame_ids = FRAME_IDS,
14 | in_path = '/ssd/EuRoc/MH_04_difficult',
15 | gt_depth_path = None,
16 | png = True,
17 | stereo_scale = False,
18 | )
19 |
20 | model = dict(
21 | name = 'autoencoder',
22 | depth_num_layers = DEPTH_LAYERS,
23 | pose_num_layers = POSE_LAYERS,
24 | frame_ids = FRAME_IDS,
25 | imgs_per_gpu = IMGS_PER_GPU,
26 | height = HEIGHT,
27 | width = WIDTH,
28 | scales = [0, 1, 2, 3],
29 | min_depth = 0.1,
30 | max_depth = 100.0,
31 | depth_pretrained_path = '/node01/jobs/io/pretrained/checkpoints/resnet/resnet{}.pth'.format(DEPTH_LAYERS),
32 | pose_pretrained_path = '/node01/jobs/io/pretrained/checkpoints/resnet/resnet{}.pth'.format(POSE_LAYERS),
33 | automask = True,
34 | disp_norm = True,
35 | use_min_construct = True,
36 | dis=0.001,
37 | cvt=0.001,
38 | )
39 |
40 |
41 | # resume_from = '/node01_data5/monodepth2-test/model/ms/ms.pth'
42 | resume_from = None
43 | finetune = None
44 | total_epochs = 30
45 | imgs_per_gpu = IMGS_PER_GPU
46 | learning_rate = 1e-4
47 | workers_per_gpu = 4
48 | validate = False
49 |
50 | optimizer = dict(type='Adam', lr=learning_rate, weight_decay=0)
51 | optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
52 | lr_config = dict(
53 | policy='step',
54 | warmup='linear',
55 | warmup_iters=500,
56 | warmup_ratio=1.0 / 3,
57 | step=[10,20],
58 | gamma=0.5,
59 | )
60 |
61 | checkpoint_config = dict(interval=1)
62 | log_config = dict(interval=50,
63 | hooks=[dict(type='TextLoggerHook'),])
64 | dist_params = dict(backend='nccl')
65 | log_level = 'INFO'
66 | load_from = None
67 | workflow = [('train', 1)]
--------------------------------------------------------------------------------
/config/cfg_euroc_fm.py:
--------------------------------------------------------------------------------
1 | DEPTH_LAYERS = 50
2 | POSE_LAYERS = 18
3 | FRAME_IDS = [0, -1, 1, 's']
4 | IMGS_PER_GPU = 2
5 | HEIGHT = 480
6 | WIDTH = 768
7 |
8 | data = dict(
9 | name = 'euroc',
10 | split = 'exp',
11 | height = HEIGHT,
12 | width = WIDTH,
13 | frame_ids = FRAME_IDS,
14 | in_path = '/ssd/EuRoc/MH_02_easy',#'/ssd/EuRoc/MH_02_easy','/ssd/EuRoc/MH_04_difficult'
15 | gt_depth_path = None,
16 | png = True,
17 | stereo_scale = True if 's' in FRAME_IDS else False,
18 | )
19 |
20 | model = dict(
21 | name = 'mono_fm',
22 | depth_num_layers = DEPTH_LAYERS,
23 | pose_num_layers = POSE_LAYERS,
24 | frame_ids = FRAME_IDS,
25 | imgs_per_gpu = IMGS_PER_GPU,
26 | height = HEIGHT,
27 | width = WIDTH,
28 | scales = [0, 1, 2, 3],
29 | min_depth = 0.1,
30 | max_depth = 50.0,
31 | depth_pretrained_path = '/node01/jobs/io/pretrained/checkpoints/resnet/resnet{}.pth'.format(DEPTH_LAYERS),
32 | pose_pretrained_path = '/node01/jobs/io/pretrained/checkpoints/resnet/resnet{}.pth'.format(POSE_LAYERS),
33 | extractor_pretrained_path = '/node01/jobs/io/out/changshu/autoencoder_euroc/epoch_30.pth',
34 | automask = False,
35 | disp_norm = False,
36 | perception_weight = 1e-3,
37 | smoothness_weight = 1e-3,
38 | )
39 |
40 | # resume_from = '/node01/jobs/io/out/changshu/fm_euroc/epoch_40.pth'
41 | resume_from = None
42 | finetune = None
43 | total_epochs = 80
44 | imgs_per_gpu = IMGS_PER_GPU
45 | learning_rate = 1e-4
46 | workers_per_gpu = 4
47 | validate = False
48 |
49 | optimizer = dict(type='Adam', lr=learning_rate, weight_decay=0)
50 | optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
51 | lr_config = dict(
52 | policy='step',
53 | warmup='linear',
54 | warmup_iters=500,
55 | warmup_ratio=1.0 / 3,
56 | step=[20,30],
57 | gamma=0.5,
58 | )
59 |
60 | checkpoint_config = dict(interval=1)
61 | log_config = dict(interval=5,
62 | hooks=[dict(type='TextLoggerHook'),])
63 | dist_params = dict(backend='nccl')
64 | log_level = 'INFO'
65 | load_from = None
66 | workflow = [('train', 1)]
--------------------------------------------------------------------------------
/config/cfg_folder.py:
--------------------------------------------------------------------------------
1 | split = 'exp'
2 | dataset = 'folder'
3 | height = 320
4 | width = 640
5 | disparity_smoothness = 1e-3
6 | scales = [0, 1, 2, 3, 4]
7 | min_depth = 0.1
8 | max_depth = 100.0
9 | frame_ids = [0, -1, 1]
10 | learning_rate = 1e-4
11 | depth_num_layers = 50
12 | pose_num_layers = 50
13 | total_epochs = 45
14 | device_ids = range(8)
15 |
16 | depth_pretrained_path = '/node01/jobs/io/pretrained/checkpoints/resnet/resnet{}.pth'.format(depth_num_layers)
17 | pose_pretrained_path = '/node01/jobs/io/pretrained/checkpoints/resnet/resnet{}.pth'.format(pose_num_layers)
18 |
19 | in_path = '/ssd/avp/soho_garage3/keyframe_underground'
20 | gt_depth_path = ''
21 | checkpoint_path = '/node01_data5/monodepth2-test/model/refine/smallfigure.pth'
22 |
23 | imgs_per_gpu = 2
24 | workers_per_gpu = 4
25 |
26 | validate = False
27 |
28 | png = False
29 | scale_invariant = False
30 | plane_fitting = False
31 | finetune = False
32 | perception = False
33 | focus_loss = False
34 |
35 | scale_invariant_weight = 0.01
36 | plane_fitting_weight = 0.0001
37 | perceptional_weight = 0.001
38 |
39 | optimizer = dict(type='Adam', lr=learning_rate, weight_decay=0)
40 | optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
41 | # learning policy
42 | lr_config = dict(
43 | policy='step',
44 | warmup='linear',
45 | warmup_iters=500,
46 | warmup_ratio=1.0 / 3,
47 | step=[15,25,35],
48 | gamma=0.5,
49 | )
50 |
51 | checkpoint_config = dict(interval=1)
52 | # yapf:disable
53 | log_config = dict(interval=50,
54 | hooks=[dict(type='TextLoggerHook'),])
55 | # yapf:enable
56 | # runtime settings
57 | dist_params = dict(backend='nccl')
58 | log_level = 'INFO'
59 | load_from = None
60 | resume_from = None
61 | workflow = [('train', 1)]
--------------------------------------------------------------------------------
/config/cfg_kitti_autoencoder.py:
--------------------------------------------------------------------------------
1 | DEPTH_LAYERS = 50
2 | POSE_LAYERS = 18
3 | FRAME_IDS = [0]
4 | IMGS_PER_GPU = 5
5 | HEIGHT = 256
6 | WIDTH = 800
7 |
8 | data = dict(
9 | name = 'kitti',
10 | split = 'exp',
11 | height = HEIGHT,
12 | width = WIDTH,
13 | frame_ids = FRAME_IDS,
14 | in_path = '/node01_data5/kitti_raw',
15 | gt_depth_path = '/node01_data5/monodepth2-test/monodepth2/gt_depths.npz',
16 | png = False,
17 | stereo_scale = False,
18 | )
19 |
20 | model = dict(
21 | name = 'autoencoder',
22 | depth_num_layers = DEPTH_LAYERS,
23 | pose_num_layers = POSE_LAYERS,
24 | frame_ids = FRAME_IDS,
25 | imgs_per_gpu = IMGS_PER_GPU,
26 | height = HEIGHT,
27 | width = WIDTH,
28 | scales = [0, 1, 2, 3],
29 | min_depth = 0.1,
30 | max_depth = 100.0,
31 | depth_pretrained_path = '/node01/jobs/io/pretrained/checkpoints/resnet/resnet{}.pth'.format(DEPTH_LAYERS),
32 | pose_pretrained_path = '/node01/jobs/io/pretrained/checkpoints/resnet/resnet{}.pth'.format(POSE_LAYERS),
33 | automask = True,
34 | disp_norm = True,
35 | use_min_construct = True,
36 | dis=0.001,
37 | cvt=0.001,
38 | )
39 |
40 | # resume_from = '/node01_data5/monodepth2-test/model/ms/ms.pth'
41 | resume_from = None
42 | finetune = None
43 | total_epochs = 30
44 | imgs_per_gpu = IMGS_PER_GPU
45 | learning_rate = 1e-4
46 | workers_per_gpu = 4
47 | validate = False
48 |
49 | optimizer = dict(type='Adam', lr=learning_rate, weight_decay=0)
50 | optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
51 | lr_config = dict(
52 | policy='step',
53 | warmup='linear',
54 | warmup_iters=500,
55 | warmup_ratio=1.0 / 3,
56 | step=[10,20],
57 | gamma=0.5,
58 | )
59 |
60 | checkpoint_config = dict(interval=1)
61 | log_config = dict(interval=50,
62 | hooks=[dict(type='TextLoggerHook'),])
63 | dist_params = dict(backend='nccl')
64 | log_level = 'INFO'
65 | load_from = None
66 | workflow = [('train', 1)]
--------------------------------------------------------------------------------
/config/cfg_kitti_fm.py:
--------------------------------------------------------------------------------
1 | DEPTH_LAYERS = 50#resnet50
2 | POSE_LAYERS = 18#resnet18
3 | FRAME_IDS = [0, -1, 1, 's']#0 refers to current frame, -1 and 1 refer to temperally adjacent frames, 's' refers to stereo adjacent frame.
4 | IMGS_PER_GPU = 2 #the number of images fed to each GPU
5 | HEIGHT = 320#input image height
6 | WIDTH = 1024#input image width
7 |
8 | data = dict(
9 | name = 'kitti',#dataset name
10 | split = 'exp',#training split name
11 | height = HEIGHT,
12 | width = WIDTH,
13 | frame_ids = FRAME_IDS,
14 | in_path = '/media/sconly/harddisk/data/kitti/kitti_raw/rawdata',#path to raw data
15 | gt_depth_path = '/media/sconly/harddisk/data/kitti/kitti_raw/rawdata/gt_depths.npz',#path to gt data
16 | png = False,#image format
17 | stereo_scale = True if 's' in FRAME_IDS else False,
18 | )
19 |
20 | model = dict(
21 | name = 'mono_fm',# select a model by name
22 | depth_num_layers = DEPTH_LAYERS,
23 | pose_num_layers = POSE_LAYERS,
24 | frame_ids = FRAME_IDS,
25 | imgs_per_gpu = IMGS_PER_GPU,
26 | height = HEIGHT,
27 | width = WIDTH,
28 | scales = [0, 1, 2, 3],# output different scales of depth maps
29 | min_depth = 0.1, # minimum of predicted depth value
30 | max_depth = 100.0, # maximum of predicted depth value
31 | depth_pretrained_path = '/media/sconly/harddisk/weight/resnet/resnet{}.pth'.format(DEPTH_LAYERS),# pretrained weights for resnet
32 | pose_pretrained_path = '/media/sconly/harddisk/weight/resnet/resnet{}.pth'.format(POSE_LAYERS),# pretrained weights for resnet
33 | extractor_pretrained_path = '/media/sconly/harddisk/weight/autoencoder.pth',# pretrained weights for autoencoder
34 | automask = False if 's' in FRAME_IDS else True,
35 | disp_norm = False if 's' in FRAME_IDS else True,
36 | perception_weight = 1e-3,
37 | smoothness_weight = 1e-3,
38 | )
39 |
40 | # resume_from = '/node01_data5/monodepth2-test/model/ms/ms.pth'#directly start training from provide weights
41 | resume_from = None
42 | finetune = None
43 | total_epochs = 40
44 | imgs_per_gpu = IMGS_PER_GPU
45 | learning_rate = 1e-4
46 | workers_per_gpu = 4
47 | validate = True
48 |
49 | optimizer = dict(type='Adam', lr=learning_rate, weight_decay=0)
50 | optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
51 | lr_config = dict(
52 | policy='step',
53 | warmup='linear',
54 | warmup_iters=500,
55 | warmup_ratio=1.0 / 3,
56 | step=[20,30],
57 | gamma=0.5,
58 | )
59 |
60 | checkpoint_config = dict(interval=1)
61 | log_config = dict(interval=50,
62 | hooks=[dict(type='TextLoggerHook'),])
63 | dist_params = dict(backend='nccl')
64 | log_level = 'INFO'
65 | load_from = None
66 | workflow = [('train', 1)]
--------------------------------------------------------------------------------
/config/cfg_kitti_fm_joint.py:
--------------------------------------------------------------------------------
1 | DEPTH_LAYERS = 50
2 | POSE_LAYERS = 18
3 | FRAME_IDS = [0, -1, 1, 's']
4 | IMGS_PER_GPU = 2
5 | HEIGHT = 192#320
6 | WIDTH = 640#1024
7 |
8 | data = dict(
9 | name = 'kitti',
10 | split = 'exp',
11 | height = HEIGHT,
12 | width = WIDTH,
13 | frame_ids = FRAME_IDS,
14 | in_path = '/media/user/harddisk/data/kitti/kitti_raw/rawdata',
15 | gt_depth_path = '/media/user/harddisk/data/kitti/kitti_raw/rawdata/gt_depths.npz',
16 | png = False,
17 | stereo_scale = True if 's' in FRAME_IDS else False,
18 | )
19 |
20 | model = dict(
21 | name = 'mono_fm_joint',
22 | depth_num_layers = DEPTH_LAYERS,
23 | pose_num_layers = POSE_LAYERS,
24 | frame_ids = FRAME_IDS,
25 | imgs_per_gpu = IMGS_PER_GPU,
26 | height = HEIGHT,
27 | width = WIDTH,
28 | scales = [0, 1, 2, 3],
29 | min_depth = 0.1,
30 | max_depth = 100.0,
31 | depth_pretrained_path = '/media/user/harddisk/weight/resnet/resnet{}.pth'.format(DEPTH_LAYERS),
32 | pose_pretrained_path = '/media/user/harddisk/weight/resnet/resnet{}.pth'.format(POSE_LAYERS),
33 | extractor_pretrained_path = '/media/user/harddisk/weight/autoencoder.pth',
34 | automask = False if 's' in FRAME_IDS else True,
35 | disp_norm = False if 's' in FRAME_IDS else True,
36 | dis=1e-3,
37 | cvt=1e-3,
38 | perception_weight = 1e-3,
39 | smoothness_weight = 1e-3,
40 | )
41 |
42 | # resume_from = '/node01_data5/monodepth2-test/model/ms/ms.pth'
43 | resume_from = None
44 | finetune = None
45 | total_epochs = 40
46 | imgs_per_gpu = IMGS_PER_GPU
47 | learning_rate = 1e-4
48 | workers_per_gpu = 4
49 | validate = True
50 |
51 | optimizer = dict(type='Adam', lr=learning_rate, weight_decay=0)
52 | optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
53 | lr_config = dict(
54 | policy='step',
55 | warmup='linear',
56 | warmup_iters=500,
57 | warmup_ratio=1.0 / 3,
58 | step=[20,30],
59 | gamma=0.5,
60 | )
61 |
62 | checkpoint_config = dict(interval=1)
63 | log_config = dict(interval=50,
64 | hooks=[dict(type='TextLoggerHook'),])
65 | dist_params = dict(backend='nccl')
66 | log_level = 'INFO'
67 | load_from = None
68 | workflow = [('train', 1)]
--------------------------------------------------------------------------------
/config/cfg_kitti_fm_refine.py:
--------------------------------------------------------------------------------
1 | DEPTH_LAYERS = 50
2 | POSE_LAYERS = 18
3 | FRAME_IDS = [0, -1, 1, 's']
4 | IMGS_PER_GPU = 2
5 | HEIGHT = 320
6 | WIDTH = 1024
7 |
8 | data = dict(
9 | name = 'kitti',
10 | split = 'test',#the split contains the list of testing data
11 | height = HEIGHT,
12 | width = WIDTH,
13 | frame_ids = FRAME_IDS,
14 | in_path = '/node01_data5/kitti_raw',#path to kitti raw data
15 | gt_depth_path = '/node01_data5/monodepth2-test/monodepth2/gt_depths.npz',#path to kitti depth ground truth
16 | png = False,
17 | stereo_scale=True if 's' in FRAME_IDS else False,
18 | )
19 |
20 | model = dict(
21 | name = 'mono_fm',
22 | depth_num_layers = DEPTH_LAYERS,
23 | pose_num_layers = POSE_LAYERS,
24 | frame_ids = FRAME_IDS,
25 | imgs_per_gpu = IMGS_PER_GPU,
26 | height = HEIGHT,
27 | width = WIDTH,
28 | scales = [0, 1, 2, 3],
29 | min_depth = 0.1,
30 | max_depth = 100.0,
31 | depth_pretrained_path = '/node01/jobs/io/pretrained/checkpoints/resnet/resnet{}.pth'.format(DEPTH_LAYERS),#path to pre-trained resnet weights
32 | pose_pretrained_path = '/node01/jobs/io/pretrained/checkpoints/resnet/resnet{}.pth'.format(POSE_LAYERS),#path to pre-trained resnet weights
33 | extractor_pretrained_path = '/node01/jobs/io/out/changshu/autoencoder3/epoch_30.pth',
34 | automask=False if 's' in FRAME_IDS else True,
35 | disp_norm=False if 's' in FRAME_IDS else True,
36 | perception_weight=1e-3,
37 | smoothness_weight=1e-3,
38 | )
39 |
40 | #path to the weights trained on the kitti raw data training split
41 | resume_from = '/node01_data5/monodepth2-test/model/wow_320_1024/epoch_40.pth'#we will resume from current epoch for further online refinement
42 | total_epochs = 60# this value must be bigger than the epochs of the weight you resume from
43 | #for example, you have trained 40 epoches on kitti raw data, and use this weight for resuming.
44 | #When resuming, the program will start from epoch 41 and finish the rest of epoches (total_epochs - 40)
45 | imgs_per_gpu = IMGS_PER_GPU
46 | learning_rate = 1e-4
47 | workers_per_gpu = 4
48 | validate = True
49 |
50 | optimizer = dict(type='Adam', lr=learning_rate, weight_decay=0)
51 | optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
52 | lr_config = dict(
53 | policy='step',
54 | warmup='linear',
55 | warmup_iters=500,
56 | warmup_ratio=1.0 / 3,
57 | step=[50],
58 | gamma=0.5,
59 | )
60 |
61 | checkpoint_config = dict(interval=1)
62 | log_config = dict(interval=5,
63 | hooks=[dict(type='TextLoggerHook'),])
64 | dist_params = dict(backend='nccl')
65 | log_level = 'INFO'
66 | load_from = None
67 | workflow = [('train', 1)]
--------------------------------------------------------------------------------
/config/cfg_make3d_fm.py:
--------------------------------------------------------------------------------
1 | DEPTH_LAYERS = 50
2 | POSE_LAYERS = 18
3 | FRAME_IDS = [0, -1, 1, 's']
4 | IMGS_PER_GPU = 2
5 | HEIGHT = 320
6 | WIDTH = 1024
7 | data = dict(
8 | name = 'folder',
9 | split = 'exp',
10 | height = HEIGHT,
11 | width = WIDTH,
12 | frame_ids = FRAME_IDS,
13 | in_path = '/node01_data5/monodepth2-test/make3d',
14 | gt_depth_path = '/node01_data5/monodepth2-test/monodepth2/gt_depths.npz',
15 | png = False,
16 | stereo_scale = True if 's' in FRAME_IDS else False,
17 | )
18 |
19 | model = dict(
20 | name = 'mono_fm',
21 | depth_num_layers = DEPTH_LAYERS,
22 | pose_num_layers = POSE_LAYERS,
23 | frame_ids = FRAME_IDS,
24 | imgs_per_gpu = IMGS_PER_GPU,
25 | height = HEIGHT,
26 | width = WIDTH,
27 | scales = [0, 1, 2, 3],
28 | min_depth = 0.1,
29 | max_depth = 100.0,
30 | depth_pretrained_path = '/node01/jobs/io/pretrained/checkpoints/resnet/resnet{}.pth'.format(DEPTH_LAYERS),
31 | pose_pretrained_path = '/node01/jobs/io/pretrained/checkpoints/resnet/resnet{}.pth'.format(POSE_LAYERS),
32 | extractor_pretrained_path = '/node01/jobs/io/out/changshu/autoencoder3/epoch_30.pth',
33 | automask = False if 's' in FRAME_IDS else True,
34 | disp_norm = False if 's' in FRAME_IDS else True,
35 | perception_weight = 1e-3,
36 | smoothness_weight = 1e-3,
37 | )
38 |
39 | # resume_from = '/node01_data5/monodepth2-test/model/ms/ms.pth'
40 | resume_from = None
41 | finetune = None
42 | total_epochs = 40
43 | imgs_per_gpu = IMGS_PER_GPU
44 | learning_rate = 1e-4
45 | workers_per_gpu = 4
46 | validate = True
47 |
48 | optimizer = dict(type='Adam', lr=learning_rate, weight_decay=0)
49 | optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
50 | lr_config = dict(
51 | policy='step',
52 | warmup='linear',
53 | warmup_iters=500,
54 | warmup_ratio=1.0 / 3,
55 | step=[20,30],
56 | gamma=0.5,
57 | )
58 |
59 | checkpoint_config = dict(interval=1)
60 | log_config = dict(interval=50,
61 | hooks=[dict(type='TextLoggerHook'),])
62 | dist_params = dict(backend='nccl')
63 | log_level = 'INFO'
64 | load_from = None
65 | workflow = [('train', 1)]
--------------------------------------------------------------------------------
/config/cfg_odom_fm.py:
--------------------------------------------------------------------------------
1 | DEPTH_LAYERS = 50
2 | POSE_LAYERS = 18
3 | FRAME_IDS = [0, 1, -1, 's']
4 | IMGS_PER_GPU = 2
5 | HEIGHT = 320
6 | WIDTH = 1024
7 |
8 |
9 | data = dict(
10 | name = 'kitti_odom',
11 | split = 'odom',
12 | height = HEIGHT,
13 | width = WIDTH,
14 | frame_ids = FRAME_IDS,
15 | in_path = '/node01/odo/dataset',
16 | gt_depth_path = '/node01_data5/monodepth2-test/monodepth2/gt_depths.npz',
17 | png = True,
18 | stereo_scale = True if 's' in FRAME_IDS else False,
19 | )
20 |
21 | model = dict(
22 | name = 'mono_fm',
23 | depth_num_layers = DEPTH_LAYERS,
24 | pose_num_layers = POSE_LAYERS,
25 | frame_ids = FRAME_IDS,
26 | imgs_per_gpu = IMGS_PER_GPU,
27 | height = HEIGHT,
28 | width = WIDTH,
29 | scales = [0, 1, 2, 3],
30 | min_depth = 0.1,
31 | max_depth = 100.0,
32 | depth_pretrained_path = '/node01/jobs/io/pretrained/checkpoints/resnet/resnet{}.pth'.format(DEPTH_LAYERS),
33 | pose_pretrained_path = '/node01/jobs/io/pretrained/checkpoints/resnet/resnet{}.pth'.format(POSE_LAYERS),
34 | extractor_pretrained_path = '/node01/jobs/io/out/changshu/autoencoder3/epoch_30.pth',
35 | automask = False if 's' in FRAME_IDS else True,
36 | disp_norm = False if 's' in FRAME_IDS else True,
37 | perception_weight=1e-3,
38 | smoothness_weight=1e-3,
39 | )
40 |
41 | # resume_from = '/node01_data5/monodepth2-test/model/ms/ms.pth'
42 | resume_from = None
43 | finetune = None
44 | total_epochs = 40
45 | imgs_per_gpu = IMGS_PER_GPU
46 | learning_rate = 1e-4
47 | workers_per_gpu = 4
48 | validate = False
49 |
50 | optimizer = dict(type='Adam', lr=learning_rate, weight_decay=0)
51 | optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
52 | lr_config = dict(
53 | policy='step',
54 | warmup='linear',
55 | warmup_iters=500,
56 | warmup_ratio=1.0 / 3,
57 | step=[25, 30],
58 | gamma=0.5,
59 | )
60 |
61 | checkpoint_config = dict(interval=1)
62 | log_config = dict(interval=50,
63 | hooks=[dict(type='TextLoggerHook'),])
64 | dist_params = dict(backend='nccl')
65 | log_level = 'INFO'
66 | load_from = None
67 | workflow = [('train', 1)]
--------------------------------------------------------------------------------
/mono/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sconlyshootery/FeatDepth/550420b3fb51a027549716b74c6fbce41651d3a5/mono/__init__.py
--------------------------------------------------------------------------------
/mono/apis/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding:utf-8 -*-
3 | # Author: Duanzhixiang(zhixiangduan@deepmotion.ai)
4 |
5 | from .trainer import train_mono
6 | from .env import init_dist, get_root_logger, set_random_seed
--------------------------------------------------------------------------------
/mono/apis/env.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding:utf-8 -*-
3 | # Author: Duanzhixiang(zhixiangduan@deepmotion.ai)
4 |
5 | import logging
6 | import os
7 | import random
8 | import subprocess
9 |
10 | import numpy as np
11 | import torch
12 | import torch.distributed as dist
13 | import torch.multiprocessing as mp
14 | from mmcv.runner import get_dist_info
15 |
16 |
17 | def init_dist(launcher, backend='nccl', **kwargs):
18 | if mp.get_start_method(allow_none=True) is None:
19 | mp.set_start_method('spawn')
20 | if launcher == 'pytorch':
21 | _init_dist_pytorch(backend, **kwargs)
22 | elif launcher == 'mpi':
23 | _init_dist_mpi(backend, **kwargs)
24 | elif launcher == 'slurm':
25 | _init_dist_slurm(backend, **kwargs)
26 | else:
27 | raise ValueError('Invalid launcher type: {}'.format(launcher))
28 |
29 |
30 | def _init_dist_pytorch(backend, **kwargs):
31 | # TODO: use local_rank instead of rank % num_gpus
32 | rank = int(os.environ['RANK'])
33 | num_gpus = torch.cuda.device_count()
34 | torch.cuda.set_device(rank % num_gpus)
35 | dist.init_process_group(backend=backend, **kwargs)
36 |
37 |
38 | def _init_dist_mpi(backend, **kwargs):
39 | raise NotImplementedError
40 |
41 |
42 | def _init_dist_slurm(backend, port=29500, **kwargs):
43 | proc_id = int(os.environ['SLURM_PROCID'])
44 | ntasks = int(os.environ['SLURM_NTASKS'])
45 | node_list = os.environ['SLURM_NODELIST']
46 | num_gpus = torch.cuda.device_count()
47 | torch.cuda.set_device(proc_id % num_gpus)
48 | addr = subprocess.getoutput(
49 | 'scontrol show hostname {} | head -n1'.format(node_list))
50 | os.environ['MASTER_PORT'] = str(port)
51 | os.environ['MASTER_ADDR'] = addr
52 | os.environ['WORLD_SIZE'] = str(ntasks)
53 | os.environ['RANK'] = str(proc_id)
54 | dist.init_process_group(backend=backend)
55 |
56 |
57 | def set_random_seed(seed):
58 | random.seed(seed)
59 | np.random.seed(seed)
60 | torch.manual_seed(seed)
61 | torch.cuda.manual_seed_all(seed)
62 |
63 |
64 | def get_root_logger(log_level=logging.INFO):
65 | logger = logging.getLogger()
66 | if not logger.hasHandlers():
67 | logging.basicConfig(
68 | format='%(asctime)s - %(levelname)s - %(message)s',
69 | level=log_level)
70 | rank, _ = get_dist_info()
71 | if rank != 0:
72 | logger.setLevel('ERROR')
73 | return logger
74 |
--------------------------------------------------------------------------------
/mono/core/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding:utf-8 -*-
3 | # Author: Duanzhixiang(zhixiangduan@deepmotion.ai)
4 |
5 | from .evaluation import NonDistEvalHook, DistEvalMonoHook
6 | from .utils import DistOptimizerHook
--------------------------------------------------------------------------------
/mono/core/evaluation/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding:utf-8 -*-
3 | # Author: Duanzhixiang(zhixiangduan@deepmotion.ai)
4 |
5 | from .eval_hooks import NonDistEvalHook, DistEvalMonoHook
--------------------------------------------------------------------------------
/mono/core/evaluation/pixel_error.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding:utf-8 -*-
3 | # Author: Duanzhixiang(zhixiangduan@deepmotion.ai)
4 |
5 | import numpy as np
6 |
7 | class AverageMeter(object):
8 | """
9 | Computes and stores the average and current value
10 | """
11 | def __init__(self):
12 | self.reset()
13 |
14 | def reset(self):
15 | self.val=0
16 | self.avg=0
17 | self.sum=0
18 | self.count=0
19 |
20 | def update(self, val, n=1):
21 | self.val=val
22 | self.sum+=val*n
23 | self.count+=n
24 | self.avg=self.sum/self.count
25 |
26 |
27 | def compute_errors(gt, pred):
28 | """Computation of error metrics between predicted and ground truth depths
29 | """
30 | thresh = np.maximum((gt / pred), (pred / gt))
31 | a1 = (thresh < 1.25 ).mean()
32 | a2 = (thresh < 1.25 ** 2).mean()
33 | a3 = (thresh < 1.25 ** 3).mean()
34 | rmse = (gt - pred) ** 2
35 | rmse = np.sqrt(rmse.mean())
36 | rmse_log = (np.log(gt) - np.log(pred)) ** 2
37 | rmse_log = np.sqrt(rmse_log.mean())
38 | abs_rel = np.mean(np.abs(gt - pred) / gt)
39 | sq_rel = np.mean(((gt - pred) ** 2) / gt)
40 | return abs_rel, sq_rel, rmse, rmse_log, a1, a2, a3
41 |
42 |
43 | def disp_to_depth(disp, min_depth = 0.1, max_depth=100):
44 | min_disp = 1 / max_depth #0.01
45 | max_disp = 1 / min_depth #10
46 | scaled_disp = min_disp + (max_disp - min_disp) * disp #(10-0.01)*disp+0.01
47 | depth = 1 / scaled_disp
48 | return scaled_disp, depth
49 |
50 |
--------------------------------------------------------------------------------
/mono/core/utils/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding:utf-8 -*-
3 | # Author: Duanzhixiang(zhixiangduan@deepmotion.ai)
4 |
5 | from .dist_utils import allreduce_grads, DistOptimizerHook
6 | from .misc import tensor2imgs, unmap, multi_apply
7 |
8 | __all__ = [
9 | 'allreduce_grads', 'DistOptimizerHook', 'tensor2imgs', 'unmap',
10 | 'multi_apply'
11 | ]
12 |
--------------------------------------------------------------------------------
/mono/core/utils/dist_utils.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding:utf-8 -*-
3 | # Author: Duanzhixiang(zhixiangduan@deepmotion.ai)
4 | from collections import OrderedDict
5 |
6 | import torch.distributed as dist
7 | from torch._utils import (_flatten_dense_tensors, _unflatten_dense_tensors,
8 | _take_tensors)
9 | from mmcv.runner import OptimizerHook
10 |
11 |
12 | def _allreduce_coalesced(tensors, world_size, bucket_size_mb=-1):
13 | if bucket_size_mb > 0:
14 | bucket_size_bytes = bucket_size_mb * 1024 * 1024
15 | buckets = _take_tensors(tensors, bucket_size_bytes)
16 | else:
17 | buckets = OrderedDict()
18 | for tensor in tensors:
19 | tp = tensor.type()
20 | if tp not in buckets:
21 | buckets[tp] = []
22 | buckets[tp].append(tensor)
23 | buckets = buckets.values()
24 |
25 | for bucket in buckets:
26 | flat_tensors = _flatten_dense_tensors(bucket)
27 | dist.all_reduce(flat_tensors)
28 | flat_tensors.div_(world_size)
29 | for tensor, synced in zip(
30 | bucket, _unflatten_dense_tensors(flat_tensors, bucket)):
31 | tensor.copy_(synced)
32 |
33 |
34 | def allreduce_grads(model, coalesce=True, bucket_size_mb=-1):
35 | grads = [
36 | param.grad.data for param in model.parameters()
37 | if param.requires_grad and param.grad is not None
38 | ]
39 | world_size = dist.get_world_size()
40 | if coalesce:
41 | _allreduce_coalesced(grads, world_size, bucket_size_mb)
42 | else:
43 | for tensor in grads:
44 | dist.all_reduce(tensor.div_(world_size))
45 |
46 |
47 | class DistOptimizerHook(OptimizerHook):
48 |
49 | def __init__(self, grad_clip=None, coalesce=True, bucket_size_mb=-1):
50 | self.grad_clip = grad_clip
51 | self.coalesce = coalesce
52 | self.bucket_size_mb = bucket_size_mb
53 |
54 | def after_train_iter(self, runner):
55 | runner.optimizer.zero_grad()
56 | runner.outputs['loss'].backward()
57 | allreduce_grads(runner.model, self.coalesce, self.bucket_size_mb)
58 | if self.grad_clip is not None:
59 | self.clip_grads(runner.model.parameters())
60 | runner.optimizer.step()
61 |
--------------------------------------------------------------------------------
/mono/core/utils/misc.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding:utf-8 -*-
3 | # Author: Duanzhixiang(zhixiangduan@deepmotion.ai)
4 |
5 | from functools import partial
6 |
7 | import mmcv
8 | import numpy as np
9 | from six.moves import map, zip
10 |
11 |
12 | def tensor2imgs(tensor, mean=(0, 0, 0), std=(1, 1, 1), to_rgb=True):
13 | num_imgs = tensor.size(0)
14 | mean = np.array(mean, dtype=np.float32)
15 | std = np.array(std, dtype=np.float32)
16 | imgs = []
17 | for img_id in range(num_imgs):
18 | img = tensor[img_id, ...].cpu().numpy().transpose(1, 2, 0)
19 | img = mmcv.imdenormalize(
20 | img, mean, std, to_bgr=to_rgb).astype(np.uint8)
21 | imgs.append(np.ascontiguousarray(img))
22 | return imgs
23 |
24 |
25 | def multi_apply(func, *args, **kwargs):
26 | pfunc = partial(func, **kwargs) if kwargs else func
27 | map_results = map(pfunc, *args)
28 | return tuple(map(list, zip(*map_results)))
29 |
30 |
31 | def unmap(data, count, inds, fill=0):
32 | """ Unmap a subset of item (data) back to the original set of items (of
33 | size count) """
34 | if data.dim() == 1:
35 | ret = data.new_full((count, ), fill)
36 | ret[inds] = data
37 | else:
38 | new_size = (count, ) + data.size()[1:]
39 | ret = data.new_full(new_size, fill)
40 | ret[inds, :] = data
41 | return ret
42 |
--------------------------------------------------------------------------------
/mono/datasets/__init__.py:
--------------------------------------------------------------------------------
1 | from .loader import build_dataloader
--------------------------------------------------------------------------------
/mono/datasets/eth3d_dataset.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import, division, print_function
2 | import random
3 | import numpy as np
4 | from PIL import Image # using pillow-simd for increased speed
5 | import os
6 |
7 | import torch
8 | import torch.utils.data as data
9 | from torchvision import transforms
10 |
11 |
12 | def pil_loader(filename):
13 | # open path as file to avoid ResourceWarning
14 | # (https://github.com/python-pillow/Pillow/issues/835)
15 | with open(filename, 'rb') as f:
16 | with Image.open(f) as img:
17 | return img.convert('RGB')
18 |
19 |
20 | class FolderDataset(data.Dataset):
21 | """Superclass for monocular dataloaders
22 |
23 | Args:
24 | data_path
25 | filenames
26 | height
27 | width
28 | frame_idxs
29 | num_scales
30 | is_train
31 | img_ext
32 | """
33 | def __init__(self,
34 | data_path,
35 | filenames,
36 | height,
37 | width,
38 | frame_idxs,
39 | is_train=False,
40 | img_ext='.jpg',
41 | gt_depth_path = None):
42 | super(FolderDataset, self).__init__()
43 |
44 | self.data_path = data_path
45 | self.filenames = sorted(os.listdir(os.path.join(data_path, 'rgb')))[1:-2]
46 | self.height = height
47 | self.width = width
48 | self.interp = Image.ANTIALIAS
49 | self.is_train = is_train
50 | self.frame_idxs = frame_idxs
51 | self.loader = pil_loader
52 | self.to_tensor = transforms.ToTensor()
53 | #726.28741455078 726.28741455078 354.6496887207 186.46566772461
54 | #w=739,h=458
55 | self.K = np.array([[0.9832, 0, 0.5, 0],
56 | [0, 1.58578, 0.5, 0],
57 | [0, 0, 1, 0],
58 | [0, 0, 0, 1]], dtype=np.float32)
59 |
60 | # Need to specify augmentations differently in pytorch 1.0 compared with 0.4
61 | if int(torch.__version__.split('.')[0]) > 0:
62 | self.brightness = (0.8, 1.2)
63 | self.contrast = (0.8, 1.2)
64 | self.saturation = (0.8, 1.2)
65 | self.hue = (-0.1, 0.1)
66 | else:
67 | self.brightness = 0.2
68 | self.contrast = 0.2
69 | self.saturation = 0.2
70 | self.hue = 0.1
71 |
72 | self.resize = transforms.Resize((self.height, self.width), interpolation=self.interp)
73 |
74 | self.flag = np.zeros(self.__len__(), dtype=np.int64)
75 |
76 | def preprocess(self, inputs, color_aug):
77 | """Resize colour images to the required scales and augment if required
78 |
79 | We create the color_aug object in advance and apply the same augmentation to all
80 | images in this item. This ensures that all images input to the pose network receive the
81 | same augmentation.
82 | """
83 | for k in list(inputs):
84 | if "color" in k:
85 | n, im, i = k
86 | inputs[(n, im, 0)] = self.resize(inputs[(n, im, - 1)])
87 |
88 | for k in list(inputs):
89 | if "color" in k:
90 | f = inputs[k]
91 | n, im, i = k
92 | inputs[(n, im, i)] = self.to_tensor(f)
93 | if i == 0:
94 | inputs[(n + "_aug", im, i)] = self.to_tensor(color_aug(f))
95 |
96 | def __len__(self):
97 | return len(self.filenames)-2
98 |
99 | def __getitem__(self, index):
100 | """Returns a single training item from the dataset as a dictionary.
101 |
102 | Values correspond to torch tensors.
103 | Keys in the dictionary are either strings or tuples:
104 |
105 | ("color", , ) for raw colour images,
106 | ("color_aug", , ) for augmented colour images,
107 | ("K", scale) or ("inv_K", scale) for camera intrinsics,
108 | "stereo_T" for camera extrinsics, and
109 | "depth_gt" for ground truth depth maps.
110 |
111 | is either:
112 | an integer (e.g. 0, -1, or 1) representing the temporal step relative to 'index',
113 | or
114 | "s" for the opposite image in the stereo pair.
115 |
116 | is an integer representing the scale of the image relative to the fullsize image:
117 | -1 images at native resolution as loaded from disk
118 | 0 images resized to (self.width, self.height )
119 | 1 images resized to (self.width // 2, self.height // 2)
120 | 2 images resized to (self.width // 4, self.height // 4)
121 | 3 images resized to (self.width // 8, self.height // 8)
122 | """
123 | inputs = {}
124 |
125 | index = index+1
126 |
127 | do_color_aug = self.is_train and random.random() > 0.5
128 | do_flip = self.is_train and random.random() > 0.5
129 |
130 | for i in self.frame_idxs:
131 | if i=='s':
132 | filename = os.path.join('rgb2', self.filenames[index])
133 | else:
134 | filename = os.path.join('rgb', self.filenames[index+i])
135 |
136 | inputs[("color", i, -1)] = self.get_color(filename, do_flip)
137 |
138 | # adjusting intrinsics to match each scale in the pyramid
139 | K = self.K.copy()
140 | K[0, :] *= self.width
141 | K[1, :] *= self.height
142 | inv_K = np.linalg.pinv(K)
143 |
144 | inputs[("K")] = torch.from_numpy(K)
145 | inputs[("inv_K")] = torch.from_numpy(inv_K)
146 |
147 | if do_color_aug:
148 | color_aug = transforms.ColorJitter.get_params(self.brightness, self.contrast, self.saturation, self.hue)
149 | else:
150 | color_aug = (lambda x: x)
151 |
152 | self.preprocess(inputs, color_aug)
153 |
154 | for i in self.frame_idxs:
155 | del inputs[("color", i, -1)]
156 |
157 | if "s" in self.frame_idxs:
158 | stereo_T = np.eye(4, dtype=np.float32)
159 | baseline_sign = -1 if do_flip else 1
160 | side_sign = -1
161 | stereo_T[0, 3] = side_sign * baseline_sign * 0.1
162 | inputs["stereo_T"] = torch.from_numpy(stereo_T)
163 |
164 | return inputs
165 |
166 | def get_color(self, filename, do_flip):
167 | color = self.loader(os.path.join(self.data_path, filename))
168 |
169 | if do_flip:
170 | color = color.transpose(Image.FLIP_LEFT_RIGHT)
171 |
172 | return color
--------------------------------------------------------------------------------
/mono/datasets/euroc_dataset.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import, division, print_function
2 | import random
3 | import numpy as np
4 | from PIL import Image # using pillow-simd for increased speed
5 | import os
6 |
7 | import torch
8 | import torch.utils.data as data
9 | from torchvision import transforms
10 |
11 |
12 | def pil_loader(filename):
13 | # open path as file to avoid ResourceWarning
14 | # (https://github.com/python-pillow/Pillow/issues/835)
15 | with open(filename, 'rb') as f:
16 | with Image.open(f) as img:
17 | return img.convert('RGB')
18 |
19 |
20 | class FolderDataset(data.Dataset):
21 | """Superclass for monocular dataloaders
22 |
23 | Args:
24 | data_path
25 | filenames
26 | height
27 | width
28 | frame_idxs
29 | num_scales
30 | is_train
31 | img_ext
32 | """
33 | def __init__(self,
34 | data_path,
35 | filenames,
36 | height,
37 | width,
38 | frame_idxs,
39 | is_train=False,
40 | img_ext='.jpg',
41 | gt_depth_path = None):
42 | super(FolderDataset, self).__init__()
43 |
44 | self.data_path = data_path
45 | # self.filenames = sorted(os.listdir(os.path.join(data_path, 'cam0', 'data')))[1:-2]#420-1940
46 | self.filenames = sorted(os.listdir(os.path.join(data_path, 'cam0', 'data'))) # 420-1940
47 | self.height = height
48 | self.width = width
49 | self.interp = Image.ANTIALIAS
50 | self.is_train = is_train
51 | self.frame_idxs = frame_idxs
52 | self.loader = pil_loader
53 | self.to_tensor = transforms.ToTensor()
54 |
55 | fx = 435.2047
56 | fy = 435.2047
57 | w = 752
58 | h = 480
59 | self.K = np.array([[fx/w, 0, 0.5, 0],
60 | [0, fy/h, 0.5, 0],
61 | [0, 0, 1, 0],
62 | [0, 0, 0, 1]], dtype=np.float32)
63 |
64 | # Need to specify augmentations differently in pytorch 1.0 compared with 0.4
65 | if int(torch.__version__.split('.')[0]) > 0:
66 | self.brightness = (0.8, 1.2)
67 | self.contrast = (0.8, 1.2)
68 | self.saturation = (0.8, 1.2)
69 | self.hue = (-0.1, 0.1)
70 | else:
71 | self.brightness = 0.2
72 | self.contrast = 0.2
73 | self.saturation = 0.2
74 | self.hue = 0.1
75 |
76 | self.resize = transforms.Resize((self.height, self.width), interpolation=self.interp)
77 |
78 | self.flag = np.zeros(self.__len__(), dtype=np.int64)
79 |
80 | def preprocess(self, inputs, color_aug):
81 | """Resize colour images to the required scales and augment if required
82 |
83 | We create the color_aug object in advance and apply the same augmentation to all
84 | images in this item. This ensures that all images input to the pose network receive the
85 | same augmentation.
86 | """
87 | for k in list(inputs):
88 | if "color" in k:
89 | n, im, i = k
90 | inputs[(n, im, 0)] = self.resize(inputs[(n, im, - 1)])
91 |
92 | for k in list(inputs):
93 | if "color" in k:
94 | f = inputs[k]
95 | n, im, i = k
96 | inputs[(n, im, i)] = self.to_tensor(f)
97 | if i == 0:
98 | inputs[(n + "_aug", im, i)] = self.to_tensor(color_aug(f))
99 |
100 | def __len__(self):
101 | return len(self.filenames)-1
102 |
103 | def __getitem__(self, index):
104 | """Returns a single training item from the dataset as a dictionary.
105 |
106 | Values correspond to torch tensors.
107 | Keys in the dictionary are either strings or tuples:
108 |
109 | ("color", , ) for raw colour images,
110 | ("color_aug", , ) for augmented colour images,
111 | ("K", scale) or ("inv_K", scale) for camera intrinsics,
112 | "stereo_T" for camera extrinsics, and
113 | "depth_gt" for ground truth depth maps.
114 |
115 | is either:
116 | an integer (e.g. 0, -1, or 1) representing the temporal step relative to 'index',
117 | or
118 | "s" for the opposite image in the stereo pair.
119 |
120 | is an integer representing the scale of the image relative to the fullsize image:
121 | -1 images at native resolution as loaded from disk
122 | 0 images resized to (self.width, self.height )
123 | 1 images resized to (self.width // 2, self.height // 2)
124 | 2 images resized to (self.width // 4, self.height // 4)
125 | 3 images resized to (self.width // 8, self.height // 8)
126 | """
127 | inputs = {}
128 |
129 | do_color_aug = self.is_train and random.random() > 0.5
130 | do_flip = self.is_train and random.random() > 0.5
131 |
132 | for i in self.frame_idxs:
133 | if i=='s':
134 | filename = os.path.join('cam1', 'data', self.filenames[index])
135 | else:
136 | filename = os.path.join('cam0', 'data', self.filenames[index+i])
137 |
138 | inputs[("color", i, -1)] = self.get_color(filename, do_flip)
139 |
140 | # adjusting intrinsics to match each scale in the pyramid
141 | K = self.K.copy()
142 | K[0, :] *= self.width
143 | K[1, :] *= self.height
144 | inv_K = np.linalg.pinv(K)
145 |
146 | inputs[("K")] = torch.from_numpy(K)
147 | inputs[("inv_K")] = torch.from_numpy(inv_K)
148 |
149 | if do_color_aug:
150 | color_aug = transforms.ColorJitter.get_params(self.brightness, self.contrast, self.saturation, self.hue)
151 | else:
152 | color_aug = (lambda x: x)
153 |
154 | self.preprocess(inputs, color_aug)
155 |
156 | for i in self.frame_idxs:
157 | del inputs[("color", i, -1)]
158 |
159 | if "s" in self.frame_idxs:
160 | stereo_T = np.eye(4, dtype=np.float32)
161 | baseline_sign = -1 if do_flip else 1
162 | side_sign = -1
163 | stereo_T[0, 3] = side_sign * baseline_sign * 0.1
164 | inputs["stereo_T"] = torch.from_numpy(stereo_T)
165 |
166 | return inputs
167 |
168 | def get_color(self, filename, do_flip):
169 | color = self.loader(os.path.join(self.data_path, filename))
170 |
171 | if do_flip:
172 | color = color.transpose(Image.FLIP_LEFT_RIGHT)
173 |
174 | return color
--------------------------------------------------------------------------------
/mono/datasets/folder_dataset.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import, division, print_function
2 | import random
3 | import numpy as np
4 | from PIL import Image # using pillow-simd for increased speed
5 | import os
6 |
7 | import torch
8 | import torch.utils.data as data
9 | from torchvision import transforms
10 |
11 |
12 | def pil_loader(filename):
13 | # open path as file to avoid ResourceWarning
14 | # (https://github.com/python-pillow/Pillow/issues/835)
15 | with open(filename, 'rb') as f:
16 | with Image.open(f) as img:
17 | return img.convert('RGB')
18 |
19 |
20 | class FolderDataset(data.Dataset):
21 | """Superclass for monocular dataloaders
22 |
23 | Args:
24 | data_path
25 | filenames
26 | height
27 | width
28 | frame_idxs
29 | num_scales
30 | is_train
31 | img_ext
32 | """
33 | def __init__(self,
34 | data_path,
35 | filenames,
36 | height,
37 | width,
38 | frame_idxs,
39 | is_train=False,
40 | img_ext='.jpg',
41 | gt_depth_path = None):
42 | super(FolderDataset, self).__init__()
43 |
44 | self.data_path = data_path
45 | self.filenames = sorted(os.listdir(data_path))
46 | self.height = height
47 | self.width = width
48 | self.interp = Image.ANTIALIAS
49 | self.is_train = is_train
50 | self.frame_idxs = frame_idxs
51 | self.loader = pil_loader
52 | self.to_tensor = transforms.ToTensor()
53 | self.K = np.array([[0.9765, 0, 0.5, 0],
54 | [0, 1.736, 0.5, 0],
55 | [0, 0, 1, 0],
56 | [0, 0, 0, 1]], dtype=np.float32)
57 |
58 | # Need to specify augmentations differently in pytorch 1.0 compared with 0.4
59 | if int(torch.__version__.split('.')[0]) > 0:
60 | self.brightness = (0.8, 1.2)
61 | self.contrast = (0.8, 1.2)
62 | self.saturation = (0.8, 1.2)
63 | self.hue = (-0.1, 0.1)
64 | else:
65 | self.brightness = 0.2
66 | self.contrast = 0.2
67 | self.saturation = 0.2
68 | self.hue = 0.1
69 |
70 | self.resize = transforms.Resize((self.height, self.width), interpolation=self.interp)
71 |
72 | self.flag = np.zeros(self.__len__(), dtype=np.int64)
73 |
74 | def preprocess(self, inputs, color_aug):
75 | """Resize colour images to the required scales and augment if required
76 |
77 | We create the color_aug object in advance and apply the same augmentation to all
78 | images in this item. This ensures that all images input to the pose network receive the
79 | same augmentation.
80 | """
81 | for k in list(inputs):
82 | if "color" in k:
83 | n, im, i = k
84 | inputs[(n, im, 0)] = self.resize(inputs[(n, im, - 1)])
85 |
86 | for k in list(inputs):
87 | if "color" in k:
88 | f = inputs[k]
89 | n, im, i = k
90 | inputs[(n, im, i)] = self.to_tensor(f)
91 | if i == 0:
92 | inputs[(n + "_aug", im, i)] = self.to_tensor(color_aug(f))
93 |
94 | def __len__(self):
95 | return len(self.filenames)
96 |
97 | def __getitem__(self, index):
98 | """Returns a single training item from the dataset as a dictionary.
99 |
100 | Values correspond to torch tensors.
101 | Keys in the dictionary are either strings or tuples:
102 |
103 | ("color", , ) for raw colour images,
104 | ("color_aug", , ) for augmented colour images,
105 | ("K", scale) or ("inv_K", scale) for camera intrinsics,
106 | "stereo_T" for camera extrinsics, and
107 | "depth_gt" for ground truth depth maps.
108 |
109 | is either:
110 | an integer (e.g. 0, -1, or 1) representing the temporal step relative to 'index',
111 | or
112 | "s" for the opposite image in the stereo pair.
113 |
114 | is an integer representing the scale of the image relative to the fullsize image:
115 | -1 images at native resolution as loaded from disk
116 | 0 images resized to (self.width, self.height )
117 | 1 images resized to (self.width // 2, self.height // 2)
118 | 2 images resized to (self.width // 4, self.height // 4)
119 | 3 images resized to (self.width // 8, self.height // 8)
120 | """
121 | inputs = {}
122 |
123 | do_color_aug = self.is_train and random.random() > 0.5
124 | do_flip = self.is_train and random.random() > 0.5
125 |
126 | for i in self.frame_idxs:
127 | try:
128 | filename = self.filenames[index+i]
129 | except:
130 | filename = self.filenames[index]
131 |
132 | inputs[("color", i, -1)] = self.get_color(filename, do_flip)
133 |
134 | # adjusting intrinsics to match each scale in the pyramid
135 | K = self.K.copy()
136 | K[0, :] *= self.width
137 | K[1, :] *= self.height
138 | inv_K = np.linalg.pinv(K)
139 |
140 | inputs[("K", 0)] = torch.from_numpy(K)
141 | inputs[("inv_K", 0)] = torch.from_numpy(inv_K)
142 |
143 | if do_color_aug:
144 | color_aug = transforms.ColorJitter.get_params(self.brightness, self.contrast, self.saturation, self.hue)
145 | else:
146 | color_aug = (lambda x: x)
147 |
148 | self.preprocess(inputs, color_aug)
149 |
150 | for i in self.frame_idxs:
151 | del inputs[("color", i, -1)]
152 |
153 | return inputs
154 |
155 | def get_color(self, filename, do_flip):
156 | color = self.loader(os.path.join(self.data_path, filename))
157 |
158 | if do_flip:
159 | color = color.transpose(Image.FLIP_LEFT_RIGHT)
160 |
161 | return color
--------------------------------------------------------------------------------
/mono/datasets/get_dataset.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding:utf-8 -*-
3 | # Author: Duanzhixiang(zhixiangduan@deepmotion.ai)
4 |
5 | import os
6 | from .utils import readlines, sec_to_hm_str
7 |
8 |
9 | def get_dataset(cfg, training=True):
10 | dataset_name = cfg['name']
11 | if dataset_name == 'kitti':
12 | from .kitti_dataset import KITTIRAWDataset as dataset
13 | elif dataset_name == 'kitti_odom':
14 | from .kitti_dataset import KITTIOdomDataset as dataset
15 | elif dataset_name == 'cityscape':
16 | from .cityscape_dataset import CityscapeDataset as dataset
17 | elif dataset_name == 'folder':
18 | from .folder_dataset import FolderDataset as dataset
19 | elif dataset_name == 'eth3d':
20 | from .eth3d_dataset import FolderDataset as dataset
21 | elif dataset_name == 'euroc':
22 | from .euroc_dataset import FolderDataset as dataset
23 |
24 | fpath = os.path.join(os.path.dirname(__file__), "splits", cfg.split, "{}_files.txt")
25 | filenames = readlines(fpath.format("train")) if training else readlines(fpath.format('val'))
26 | img_ext = '.png' if cfg.png == True else '.jpg'
27 |
28 | dataset = dataset(cfg.in_path,
29 | filenames,
30 | cfg.height,
31 | cfg.width,
32 | cfg.frame_ids if training else [0],
33 | is_train=training,
34 | img_ext=img_ext,
35 | gt_depth_path=cfg.gt_depth_path)
36 | return dataset
--------------------------------------------------------------------------------
/mono/datasets/kitti_dataset.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import, division, print_function
2 |
3 | import os
4 | import scipy.misc
5 | import numpy as np
6 | import PIL.Image as pil
7 | import datetime
8 |
9 | from .kitti_utils import generate_depth_map, read_calib_file, transform_from_rot_trans, pose_from_oxts_packet
10 | from .mono_dataset import MonoDataset
11 |
12 |
13 | class KITTIDataset(MonoDataset):
14 | """Superclass for different types of KITTI dataset loaders
15 | """
16 | def __init__(self, *args, **kwargs):
17 | super(KITTIDataset, self).__init__(*args, **kwargs)
18 |
19 | self.K = np.array([[0.58, 0, 0.5, 0],
20 | [0, 1.92, 0.5, 0],
21 | [0, 0, 1, 0],
22 | [0, 0, 0, 1]], dtype=np.float32)
23 |
24 | self.full_res_shape = (1242, 375)
25 | self.side_map = {"2": 2, "3": 3, "l": 2, "r": 3}
26 |
27 | def check_depth(self):
28 | line = self.filenames[0].split()
29 | scene_name = line[0]
30 | frame_index = int(line[1])
31 |
32 | velo_filename = os.path.join(
33 | self.data_path,
34 | scene_name,
35 | "velodyne_points/data/{:010d}.bin".format(int(frame_index)))
36 |
37 | return os.path.isfile(velo_filename)
38 |
39 | def get_color(self, folder, frame_index, side, do_flip):
40 | color = self.loader(self.get_image_path(folder, frame_index, side))
41 |
42 | if do_flip:
43 | color = color.transpose(pil.FLIP_LEFT_RIGHT)
44 |
45 | return color
46 |
47 |
48 | class KITTIRAWDataset(KITTIDataset):
49 | """KITTI dataset which loads the original velodyne depth maps for ground truth
50 | """
51 | def __init__(self, *args, **kwargs):
52 | super(KITTIRAWDataset, self).__init__(*args, **kwargs)
53 |
54 | def get_image_path(self, folder, frame_index, side):
55 | f_str = "{:010d}{}".format(frame_index, self.img_ext)
56 | image_path = os.path.join(
57 | self.data_path, folder, "image_0{}/data".format(self.side_map[side]), f_str)
58 | return image_path
59 |
60 | def get_depth(self, folder, frame_index, side, do_flip):
61 | calib_path = os.path.join(self.data_path, folder.split("/")[0])
62 |
63 | velo_filename = os.path.join(
64 | self.data_path,
65 | folder,
66 | "velodyne_points/data/{:010d}.bin".format(int(frame_index)))
67 |
68 | depth_gt = generate_depth_map(calib_path, velo_filename, self.side_map[side])
69 | depth_gt = scipy.misc.imresize(depth_gt, self.full_res_shape[::-1], "nearest")
70 |
71 | if do_flip:
72 | depth_gt = np.fliplr(depth_gt)
73 |
74 | return depth_gt
75 |
76 | def get_pose(self, folder, frame_index, offset):
77 | oxts_root = os.path.join(self.data_path, folder, 'oxts')
78 | with open(os.path.join(oxts_root, 'timestamps.txt')) as f:
79 | timestamps = np.array([datetime.datetime.strptime(ts[:-3], "%Y-%m-%d %H:%M:%S.%f").timestamp()
80 | for ts in f.read().splitlines()])
81 |
82 | speed0 = np.genfromtxt(os.path.join(oxts_root, 'data', '{:010d}.txt'.format(frame_index)))[[8, 9, 10]]
83 | # speed1 = np.genfromtxt(os.path.join(oxts_root, 'data', '{:010d}.txt'.format(frame_index+offset)))[[8, 9, 10]]
84 |
85 | timestamp0 = timestamps[frame_index]
86 | timestamp1 = timestamps[frame_index+offset]
87 | # displacement = 0.5 * (speed0 + speed1) * (timestamp1 - timestamp0)
88 | displacement = speed0 * (timestamp1 - timestamp0)
89 |
90 | imu2velo = read_calib_file(os.path.join(self.data_path, os.path.dirname(folder), 'calib_imu_to_velo.txt'))
91 | velo2cam = read_calib_file(os.path.join(self.data_path, os.path.dirname(folder), 'calib_velo_to_cam.txt'))
92 | cam2cam = read_calib_file(os.path.join(self.data_path, os.path.dirname(folder), 'calib_cam_to_cam.txt'))
93 |
94 | velo2cam_mat = transform_from_rot_trans(velo2cam['R'], velo2cam['T'])
95 | imu2velo_mat = transform_from_rot_trans(imu2velo['R'], imu2velo['T'])
96 | cam_2rect_mat = transform_from_rot_trans(cam2cam['R_rect_00'], np.zeros(3))
97 |
98 | imu2cam = cam_2rect_mat @ velo2cam_mat @ imu2velo_mat
99 |
100 | odo_pose = imu2cam[:3,:3] @ displacement + imu2cam[:3,3]
101 |
102 | return odo_pose
103 |
104 |
105 | class KITTIOdomDataset(KITTIDataset):
106 | """KITTI dataset for odometry training and testing
107 | """
108 | def __init__(self, *args, **kwargs):
109 | super(KITTIOdomDataset, self).__init__(*args, **kwargs)
110 |
111 | def get_image_path(self, folder, frame_index, side):
112 | f_str = "{:06d}{}".format(frame_index, self.img_ext)
113 | side_map = {"l": 0, "r": 1}
114 | image_path = os.path.join(
115 | self.data_path,
116 | "sequences/{:02d}".format(int(folder)),
117 | "image_{}".format(side_map[side]),
118 | f_str)
119 | return image_path
120 |
121 |
122 | class KITTIDepthDataset(KITTIDataset):
123 | """KITTI dataset which uses the updated ground truth depth maps
124 | """
125 | def __init__(self, *args, **kwargs):
126 | super(KITTIDepthDataset, self).__init__(*args, **kwargs)
127 |
128 | def get_image_path(self, folder, frame_index, side):
129 | f_str = "{:010d}{}".format(frame_index, self.img_ext)
130 | image_path = os.path.join(
131 | self.data_path,
132 | folder,
133 | "image_0{}/data".format(self.side_map[side]),
134 | f_str)
135 | return image_path
136 |
137 | def get_depth(self, folder, frame_index, side, do_flip):
138 | f_str = "{:010d}.png".format(frame_index)
139 | depth_path = os.path.join(
140 | self.data_path,
141 | folder,
142 | "proj_depth/groundtruth/image_0{}".format(self.side_map[side]),
143 | f_str)
144 |
145 | depth_gt = pil.open(depth_path)
146 | depth_gt = depth_gt.resize(self.full_res_shape, pil.NEAREST)
147 | depth_gt = np.array(depth_gt).astype(np.float32) / 256
148 |
149 | if do_flip:
150 | depth_gt = np.fliplr(depth_gt)
151 |
152 | return depth_gt
153 |
--------------------------------------------------------------------------------
/mono/datasets/kitti_utils.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding:utf-8 -*-
3 | # Author: Duanzhixiang(zhixiangduan@deepmotion.ai)
4 |
5 | from __future__ import absolute_import, division, print_function
6 |
7 | import os
8 | import numpy as np
9 | from collections import Counter
10 |
11 |
12 | def load_velodyne_points(filename):
13 | """Load 3D point cloud from KITTI file format
14 | (adapted from https://github.com/hunse/kitti)
15 | """
16 | points = np.fromfile(filename, dtype=np.float32).reshape(-1, 4)
17 | points[:, 3] = 1.0 # homogeneous
18 | return points
19 |
20 |
21 | def read_calib_file(path):
22 | """Read KITTI calibration file
23 | (from https://github.com/hunse/kitti)
24 | """
25 | float_chars = set("0123456789.e+- ")
26 | data = {}
27 | with open(path, 'r') as f:
28 | for line in f.readlines():
29 | key, value = line.split(':', 1)
30 | value = value.strip()
31 | data[key] = value
32 | if float_chars.issuperset(value):
33 | # try to cast to float array
34 | try:
35 | data[key] = np.array(list(map(float, value.split(' '))))
36 | except ValueError:
37 | # casting error: data[key] already eq. value, so pass
38 | pass
39 |
40 | return data
41 |
42 |
43 | def sub2ind(matrixSize, rowSub, colSub):
44 | """Convert row, col matrix subscripts to linear indices
45 | """
46 | m, n = matrixSize
47 | return rowSub * (n-1) + colSub - 1
48 |
49 |
50 | def generate_depth_map(calib_dir, velo_filename, cam=2, vel_depth=False):
51 | """Generate a depth map from velodyne data
52 | """
53 | # load calibration files
54 | cam2cam = read_calib_file(os.path.join(calib_dir, 'calib_cam_to_cam.txt'))
55 | velo2cam = read_calib_file(os.path.join(calib_dir, 'calib_velo_to_cam.txt'))
56 | velo2cam = np.hstack((velo2cam['R'].reshape(3, 3), velo2cam['T'][..., np.newaxis]))
57 | velo2cam = np.vstack((velo2cam, np.array([0, 0, 0, 1.0])))
58 |
59 | # get image shape
60 | im_shape = cam2cam["S_rect_02"][::-1].astype(np.int32)
61 |
62 | # compute projection matrix velodyne->image plane
63 | R_cam2rect = np.eye(4)
64 | R_cam2rect[:3, :3] = cam2cam['R_rect_00'].reshape(3, 3)
65 | P_rect = cam2cam['P_rect_0'+str(cam)].reshape(3, 4)
66 | P_velo2im = np.dot(np.dot(P_rect, R_cam2rect), velo2cam)
67 |
68 | # load velodyne points and remove all behind image plane (approximation)
69 | # each row of the velodyne data is forward, left, up, reflectance
70 | velo = load_velodyne_points(velo_filename)
71 | velo = velo[velo[:, 0] >= 0, :]
72 |
73 | # project the points to the camera
74 | velo_pts_im = np.dot(P_velo2im, velo.T).T
75 | velo_pts_im[:, :2] = velo_pts_im[:, :2] / velo_pts_im[:, 2][..., np.newaxis]
76 |
77 | if vel_depth:
78 | velo_pts_im[:, 2] = velo[:, 0]
79 |
80 | # check if in bounds
81 | # use minus 1 to get the exact same value as KITTI matlab code
82 | velo_pts_im[:, 0] = np.round(velo_pts_im[:, 0]) - 1
83 | velo_pts_im[:, 1] = np.round(velo_pts_im[:, 1]) - 1
84 | val_inds = (velo_pts_im[:, 0] >= 0) & (velo_pts_im[:, 1] >= 0)
85 | val_inds = val_inds & (velo_pts_im[:, 0] < im_shape[1]) & (velo_pts_im[:, 1] < im_shape[0])
86 | velo_pts_im = velo_pts_im[val_inds, :]
87 |
88 | # project to image
89 | depth = np.zeros((im_shape[:2]))
90 | depth[velo_pts_im[:, 1].astype(np.int), velo_pts_im[:, 0].astype(np.int)] = velo_pts_im[:, 2]
91 |
92 | # find the duplicate points and choose the closest depth
93 | inds = sub2ind(depth.shape, velo_pts_im[:, 1], velo_pts_im[:, 0])
94 | dupe_inds = [item for item, count in Counter(inds).items() if count > 1]
95 | for dd in dupe_inds:
96 | pts = np.where(inds == dd)[0]
97 | x_loc = int(velo_pts_im[pts[0], 0])
98 | y_loc = int(velo_pts_im[pts[0], 1])
99 | depth[y_loc, x_loc] = velo_pts_im[pts, 2].min()
100 | depth[depth < 0] = 0
101 |
102 | return depth
103 |
104 |
105 | def rotx(t):
106 | """Rotation about the x-axis."""
107 | c = np.cos(t)
108 | s = np.sin(t)
109 | return np.array([[1, 0, 0],
110 | [0, c, -s],
111 | [0, s, c]])
112 |
113 |
114 | def roty(t):
115 | """Rotation about the y-axis."""
116 | c = np.cos(t)
117 | s = np.sin(t)
118 | return np.array([[c, 0, s],
119 | [0, 1, 0],
120 | [-s, 0, c]])
121 |
122 |
123 | def rotz(t):
124 | """Rotation about the z-axis."""
125 | c = np.cos(t)
126 | s = np.sin(t)
127 | return np.array([[c, -s, 0],
128 | [s, c, 0],
129 | [0, 0, 1]])
130 |
131 |
132 | def pose_from_oxts_packet(metadata, scale):
133 |
134 | lat, lon, alt, roll, pitch, yaw = metadata
135 | """Helper method to compute a SE(3) pose matrix from an OXTS packet.
136 | Taken from https://github.com/utiasSTARS/pykitti
137 | """
138 |
139 | er = 6378137. # earth radius (approx.) in meters
140 | # Use a Mercator projection to get the translation vector
141 |
142 | tx = scale * lon * np.pi * er / 180.
143 | ty = scale * er * \
144 | np.log(np.tan((90. + lat) * np.pi / 360.))
145 | tz = alt
146 | t = np.array([tx, ty, tz]).reshape(-1,1)
147 |
148 | # Use the Euler angles to get the rotation matrix
149 | Rx = rotx(roll)
150 | Ry = roty(pitch)
151 | Rz = rotz(yaw)
152 | R = Rz.dot(Ry.dot(Rx))
153 | return transform_from_rot_trans(R, t)
154 |
155 |
156 | def transform_from_rot_trans(R, t):
157 | """Transforation matrix from rotation matrix and translation vector."""
158 | R = R.reshape(3, 3)
159 | t = t.reshape(3, 1)
160 | return np.vstack((np.hstack([R, t]), [0, 0, 0, 1]))
161 |
162 |
163 |
--------------------------------------------------------------------------------
/mono/datasets/loader/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding:utf-8 -*-
3 | # Author: Duanzhixiang(zhixiangduan@deepmotion.ai)
4 |
5 | from .build_loader import build_dataloader
--------------------------------------------------------------------------------
/mono/datasets/loader/build_loader.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding:utf-8 -*-
3 | # Author: Duanzhixiang(zhixiangduan@deepmotion.ai)
4 |
5 | from functools import partial
6 |
7 | from mmcv.runner import get_dist_info
8 | from mmcv.parallel import collate
9 | from torch.utils.data import DataLoader
10 | from .sampler import GroupSampler, DistributedGroupSampler, DistributedSampler
11 |
12 | # https://github.com/pytorch/pytorch/issues/973
13 | import resource
14 | rlimit = resource.getrlimit(resource.RLIMIT_NOFILE)
15 | resource.setrlimit(resource.RLIMIT_NOFILE, (4096, rlimit[1]))
16 |
17 |
18 | def build_dataloader(dataset,
19 | imgs_per_gpu,
20 | workers_per_gpu,
21 | num_gpus=1,
22 | dist=True,
23 | **kwargs):
24 | shuffle = kwargs.get('shuffle', True)
25 | if dist:
26 | rank, world_size = get_dist_info()
27 | if shuffle:
28 | sampler = DistributedGroupSampler(dataset,
29 | imgs_per_gpu,
30 | world_size,
31 | rank)
32 | else:
33 | sampler = DistributedSampler(dataset,
34 | world_size,
35 | rank,
36 | shuffle=False)
37 | batch_size = imgs_per_gpu
38 | num_workers = workers_per_gpu
39 | else:
40 | sampler = GroupSampler(dataset, imgs_per_gpu) if shuffle else None
41 | batch_size = num_gpus * imgs_per_gpu
42 | num_workers = num_gpus * workers_per_gpu
43 |
44 | data_loader = DataLoader(dataset,
45 | batch_size=batch_size,
46 | sampler=sampler,
47 | num_workers=num_workers,
48 | collate_fn=partial(collate, samples_per_gpu=imgs_per_gpu),
49 | pin_memory=False,
50 | **kwargs,
51 | drop_last=True
52 | )
53 |
54 | return data_loader
55 |
--------------------------------------------------------------------------------
/mono/datasets/loader/sampler.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding:utf-8 -*-
3 | # Author: Duanzhixiang(zhixiangduan@deepmotion.ai)
4 |
5 | from __future__ import division
6 |
7 | import math
8 | import torch
9 | import numpy as np
10 |
11 | from torch.distributed import get_world_size, get_rank
12 | from torch.utils.data import Sampler
13 | from torch.utils.data import DistributedSampler as _DistributedSampler
14 |
15 |
16 | class DistributedSampler(_DistributedSampler):
17 |
18 | def __init__(self, dataset, num_replicas=None, rank=None, shuffle=True):
19 | super().__init__(dataset, num_replicas=num_replicas, rank=rank)
20 | self.shuffle = shuffle
21 |
22 | def __iter__(self):
23 | # deterministically shuffle based on epoch
24 | if self.shuffle:
25 | g = torch.Generator()
26 | g.manual_seed(self.epoch)
27 | indices = torch.randperm(len(self.dataset), generator=g).tolist()
28 | else:
29 | indices = torch.arange(len(self.dataset)).tolist()
30 |
31 | # add extra samples to make it evenly divisible
32 | indices += indices[:(self.total_size - len(indices))]
33 | assert len(indices) == self.total_size
34 |
35 | # subsample
36 | indices = indices[self.rank:self.total_size:self.num_replicas]
37 | assert len(indices) == self.num_samples
38 |
39 | return iter(indices)
40 |
41 |
42 | class GroupSampler(Sampler):
43 |
44 | def __init__(self, dataset, samples_per_gpu=1):
45 | assert hasattr(dataset, 'flag')
46 | self.dataset = dataset
47 | self.samples_per_gpu = samples_per_gpu
48 | self.flag = dataset.flag.astype(np.int64)
49 | self.group_sizes = np.bincount(self.flag)
50 | self.num_samples = 0
51 | for i, size in enumerate(self.group_sizes):
52 | self.num_samples += int(np.ceil(
53 | size / self.samples_per_gpu)) * self.samples_per_gpu
54 |
55 | def __iter__(self):
56 | indices = []
57 | for i, size in enumerate(self.group_sizes):
58 | if size == 0:
59 | continue
60 | indice = np.where(self.flag == i)[0]
61 | assert len(indice) == size
62 | np.random.shuffle(indice)
63 | num_extra = int(np.ceil(size / self.samples_per_gpu)
64 | ) * self.samples_per_gpu - len(indice)
65 | indice = np.concatenate([indice, indice[:num_extra]])
66 | indices.append(indice)
67 | indices = np.concatenate(indices)
68 | indices = [
69 | indices[i * self.samples_per_gpu:(i + 1) * self.samples_per_gpu]
70 | for i in np.random.permutation(
71 | range(len(indices) // self.samples_per_gpu))
72 | ]
73 | indices = np.concatenate(indices)
74 | indices = torch.from_numpy(indices).long()
75 | assert len(indices) == self.num_samples
76 | return iter(indices)
77 |
78 | def __len__(self):
79 | return self.num_samples
80 |
81 |
82 | class DistributedGroupSampler(Sampler):
83 | """Sampler that restricts data loading to a subset of the dataset.
84 | It is especially useful in conjunction with
85 | :class:`torch.nn.parallel.DistributedDataParallel`. In such case, each
86 | process can pass a DistributedSampler instance as a DataLoader sampler,
87 | and load a subset of the original dataset that is exclusive to it.
88 | .. note::
89 | Dataset is assumed to be of constant size.
90 | Arguments:
91 | dataset: Dataset used for sampling.
92 | num_replicas (optional): Number of processes participating in
93 | distributed training.
94 | rank (optional): Rank of the current process within num_replicas.
95 | """
96 |
97 | def __init__(self,
98 | dataset,
99 | samples_per_gpu=1,
100 | num_replicas=None,
101 | rank=None):
102 | if num_replicas is None:
103 | num_replicas = get_world_size()
104 | if rank is None:
105 | rank = get_rank()
106 | self.dataset = dataset
107 | self.samples_per_gpu = samples_per_gpu
108 | self.num_replicas = num_replicas
109 | self.rank = rank
110 | self.epoch = 0
111 |
112 | assert hasattr(self.dataset, 'flag')
113 | self.flag = self.dataset.flag
114 | self.group_sizes = np.bincount(self.flag)
115 |
116 | self.num_samples = 0
117 | for i, j in enumerate(self.group_sizes):
118 | self.num_samples += int(
119 | math.ceil(self.group_sizes[i] * 1.0 / self.samples_per_gpu /
120 | self.num_replicas)) * self.samples_per_gpu
121 | self.total_size = self.num_samples * self.num_replicas
122 |
123 | def __iter__(self):
124 | # deterministically shuffle based on epoch
125 | g = torch.Generator()
126 | g.manual_seed(self.epoch)
127 |
128 | indices = []
129 | for i, size in enumerate(self.group_sizes):
130 | if size > 0:
131 | indice = np.where(self.flag == i)[0]
132 | assert len(indice) == size
133 | indice = indice[list(torch.randperm(int(size),
134 | generator=g))].tolist()
135 | extra = int(
136 | math.ceil(
137 | size * 1.0 / self.samples_per_gpu / self.num_replicas)
138 | ) * self.samples_per_gpu * self.num_replicas - len(indice)
139 | indice += indice[:extra]
140 | indices += indice
141 |
142 | assert len(indices) == self.total_size
143 |
144 | indices = [
145 | indices[j] for i in list(
146 | torch.randperm(len(indices) // self.samples_per_gpu,
147 | generator=g))
148 | for j in range(i * self.samples_per_gpu, (i + 1) *
149 | self.samples_per_gpu)
150 | ]
151 |
152 | # subsample
153 | offset = self.num_samples * self.rank
154 | indices = indices[offset:offset + self.num_samples]
155 | assert len(indices) == self.num_samples
156 |
157 | return iter(indices)
158 |
159 | def __len__(self):
160 | return self.num_samples
161 |
162 | def set_epoch(self, epoch):
163 | self.epoch = epoch
164 |
--------------------------------------------------------------------------------
/mono/datasets/splits/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding:utf-8 -*-
3 | # Author: Duanzhixiang(zhixiangduan@deepmotion.ai)
--------------------------------------------------------------------------------
/mono/datasets/splits/benchmark/eigen_to_benchmark_ids.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sconlyshootery/FeatDepth/550420b3fb51a027549716b74c6fbce41651d3a5/mono/datasets/splits/benchmark/eigen_to_benchmark_ids.npy
--------------------------------------------------------------------------------
/mono/datasets/splits/cityscape/gen_cityscape_split.py:
--------------------------------------------------------------------------------
1 | import zipfile
2 | import os
3 |
4 | def main():
5 | file = '/ssd/Cityscapes/leftImg8bit_sequence_trainvaltest.zip'
6 | archive = zipfile.ZipFile(file, 'r')
7 | namelist = sorted(archive.namelist())
8 |
9 | if os.path.exists(os.path.join('..', 'splits', 'cityscape')):
10 | print('path exists')
11 | else:
12 | os.makedirs(os.path.join('..', 'splits', 'cityscape'))
13 | with open(os.path.join('..', 'splits', 'cityscape', 'train.txt'), 'w') as trainfile:
14 | with open(os.path.join('..', 'splits', 'cityscape', 'val.txt'), 'w') as valfile:
15 | with open(os.path.join('..', 'splits', 'cityscape', 'test.txt'), 'w') as testfile:
16 | for i in range(len(namelist)):
17 | str = namelist[i]
18 | if 'png' in str:
19 | if 'train' in str:
20 | trainfile.write(str)
21 | trainfile.write('\n')
22 | elif 'val' in str:
23 | valfile.write(str)
24 | valfile.write('\n')
25 | elif 'test' in str:
26 | testfile.write(str)
27 | testfile.write('\n')
28 |
29 |
30 |
31 | if __name__ == '__main__':
32 | main()
--------------------------------------------------------------------------------
/mono/datasets/splits/cityscape/val_files.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sconlyshootery/FeatDepth/550420b3fb51a027549716b74c6fbce41651d3a5/mono/datasets/splits/cityscape/val_files.txt
--------------------------------------------------------------------------------
/mono/datasets/splits/exp/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding:utf-8 -*-
3 | # Author: Duanzhixiang(zhixiangduan@deepmotion.ai)
--------------------------------------------------------------------------------
/mono/datasets/splits/kitti_shot_sequence/gen_split.py:
--------------------------------------------------------------------------------
1 | if __name__ == "__main__":
2 |
3 | f = open('val_files.txt', 'w')
4 | for i in range(108):
5 | f.writelines(['2011_09_26/2011_09_26_drive_0001_sync ', str(i).zfill(10), ' l\n'])
6 |
7 | f.close()
8 | print('done')
--------------------------------------------------------------------------------
/mono/datasets/splits/kitti_shot_sequence/val_files.txt:
--------------------------------------------------------------------------------
1 | 2011_09_26/2011_09_26_drive_0001_sync 0000000000 l
2 | 2011_09_26/2011_09_26_drive_0001_sync 0000000001 l
3 | 2011_09_26/2011_09_26_drive_0001_sync 0000000002 l
4 | 2011_09_26/2011_09_26_drive_0001_sync 0000000003 l
5 | 2011_09_26/2011_09_26_drive_0001_sync 0000000004 l
6 | 2011_09_26/2011_09_26_drive_0001_sync 0000000005 l
7 | 2011_09_26/2011_09_26_drive_0001_sync 0000000006 l
8 | 2011_09_26/2011_09_26_drive_0001_sync 0000000007 l
9 | 2011_09_26/2011_09_26_drive_0001_sync 0000000008 l
10 | 2011_09_26/2011_09_26_drive_0001_sync 0000000009 l
11 | 2011_09_26/2011_09_26_drive_0001_sync 0000000010 l
12 | 2011_09_26/2011_09_26_drive_0001_sync 0000000011 l
13 | 2011_09_26/2011_09_26_drive_0001_sync 0000000012 l
14 | 2011_09_26/2011_09_26_drive_0001_sync 0000000013 l
15 | 2011_09_26/2011_09_26_drive_0001_sync 0000000014 l
16 | 2011_09_26/2011_09_26_drive_0001_sync 0000000015 l
17 | 2011_09_26/2011_09_26_drive_0001_sync 0000000016 l
18 | 2011_09_26/2011_09_26_drive_0001_sync 0000000017 l
19 | 2011_09_26/2011_09_26_drive_0001_sync 0000000018 l
20 | 2011_09_26/2011_09_26_drive_0001_sync 0000000019 l
21 | 2011_09_26/2011_09_26_drive_0001_sync 0000000020 l
22 | 2011_09_26/2011_09_26_drive_0001_sync 0000000021 l
23 | 2011_09_26/2011_09_26_drive_0001_sync 0000000022 l
24 | 2011_09_26/2011_09_26_drive_0001_sync 0000000023 l
25 | 2011_09_26/2011_09_26_drive_0001_sync 0000000024 l
26 | 2011_09_26/2011_09_26_drive_0001_sync 0000000025 l
27 | 2011_09_26/2011_09_26_drive_0001_sync 0000000026 l
28 | 2011_09_26/2011_09_26_drive_0001_sync 0000000027 l
29 | 2011_09_26/2011_09_26_drive_0001_sync 0000000028 l
30 | 2011_09_26/2011_09_26_drive_0001_sync 0000000029 l
31 | 2011_09_26/2011_09_26_drive_0001_sync 0000000030 l
32 | 2011_09_26/2011_09_26_drive_0001_sync 0000000031 l
33 | 2011_09_26/2011_09_26_drive_0001_sync 0000000032 l
34 | 2011_09_26/2011_09_26_drive_0001_sync 0000000033 l
35 | 2011_09_26/2011_09_26_drive_0001_sync 0000000034 l
36 | 2011_09_26/2011_09_26_drive_0001_sync 0000000035 l
37 | 2011_09_26/2011_09_26_drive_0001_sync 0000000036 l
38 | 2011_09_26/2011_09_26_drive_0001_sync 0000000037 l
39 | 2011_09_26/2011_09_26_drive_0001_sync 0000000038 l
40 | 2011_09_26/2011_09_26_drive_0001_sync 0000000039 l
41 | 2011_09_26/2011_09_26_drive_0001_sync 0000000040 l
42 | 2011_09_26/2011_09_26_drive_0001_sync 0000000041 l
43 | 2011_09_26/2011_09_26_drive_0001_sync 0000000042 l
44 | 2011_09_26/2011_09_26_drive_0001_sync 0000000043 l
45 | 2011_09_26/2011_09_26_drive_0001_sync 0000000044 l
46 | 2011_09_26/2011_09_26_drive_0001_sync 0000000045 l
47 | 2011_09_26/2011_09_26_drive_0001_sync 0000000046 l
48 | 2011_09_26/2011_09_26_drive_0001_sync 0000000047 l
49 | 2011_09_26/2011_09_26_drive_0001_sync 0000000048 l
50 | 2011_09_26/2011_09_26_drive_0001_sync 0000000049 l
51 | 2011_09_26/2011_09_26_drive_0001_sync 0000000050 l
52 | 2011_09_26/2011_09_26_drive_0001_sync 0000000051 l
53 | 2011_09_26/2011_09_26_drive_0001_sync 0000000052 l
54 | 2011_09_26/2011_09_26_drive_0001_sync 0000000053 l
55 | 2011_09_26/2011_09_26_drive_0001_sync 0000000054 l
56 | 2011_09_26/2011_09_26_drive_0001_sync 0000000055 l
57 | 2011_09_26/2011_09_26_drive_0001_sync 0000000056 l
58 | 2011_09_26/2011_09_26_drive_0001_sync 0000000057 l
59 | 2011_09_26/2011_09_26_drive_0001_sync 0000000058 l
60 | 2011_09_26/2011_09_26_drive_0001_sync 0000000059 l
61 | 2011_09_26/2011_09_26_drive_0001_sync 0000000060 l
62 | 2011_09_26/2011_09_26_drive_0001_sync 0000000061 l
63 | 2011_09_26/2011_09_26_drive_0001_sync 0000000062 l
64 | 2011_09_26/2011_09_26_drive_0001_sync 0000000063 l
65 | 2011_09_26/2011_09_26_drive_0001_sync 0000000064 l
66 | 2011_09_26/2011_09_26_drive_0001_sync 0000000065 l
67 | 2011_09_26/2011_09_26_drive_0001_sync 0000000066 l
68 | 2011_09_26/2011_09_26_drive_0001_sync 0000000067 l
69 | 2011_09_26/2011_09_26_drive_0001_sync 0000000068 l
70 | 2011_09_26/2011_09_26_drive_0001_sync 0000000069 l
71 | 2011_09_26/2011_09_26_drive_0001_sync 0000000070 l
72 | 2011_09_26/2011_09_26_drive_0001_sync 0000000071 l
73 | 2011_09_26/2011_09_26_drive_0001_sync 0000000072 l
74 | 2011_09_26/2011_09_26_drive_0001_sync 0000000073 l
75 | 2011_09_26/2011_09_26_drive_0001_sync 0000000074 l
76 | 2011_09_26/2011_09_26_drive_0001_sync 0000000075 l
77 | 2011_09_26/2011_09_26_drive_0001_sync 0000000076 l
78 | 2011_09_26/2011_09_26_drive_0001_sync 0000000077 l
79 | 2011_09_26/2011_09_26_drive_0001_sync 0000000078 l
80 | 2011_09_26/2011_09_26_drive_0001_sync 0000000079 l
81 | 2011_09_26/2011_09_26_drive_0001_sync 0000000080 l
82 | 2011_09_26/2011_09_26_drive_0001_sync 0000000081 l
83 | 2011_09_26/2011_09_26_drive_0001_sync 0000000082 l
84 | 2011_09_26/2011_09_26_drive_0001_sync 0000000083 l
85 | 2011_09_26/2011_09_26_drive_0001_sync 0000000084 l
86 | 2011_09_26/2011_09_26_drive_0001_sync 0000000085 l
87 | 2011_09_26/2011_09_26_drive_0001_sync 0000000086 l
88 | 2011_09_26/2011_09_26_drive_0001_sync 0000000087 l
89 | 2011_09_26/2011_09_26_drive_0001_sync 0000000088 l
90 | 2011_09_26/2011_09_26_drive_0001_sync 0000000089 l
91 | 2011_09_26/2011_09_26_drive_0001_sync 0000000090 l
92 | 2011_09_26/2011_09_26_drive_0001_sync 0000000091 l
93 | 2011_09_26/2011_09_26_drive_0001_sync 0000000092 l
94 | 2011_09_26/2011_09_26_drive_0001_sync 0000000093 l
95 | 2011_09_26/2011_09_26_drive_0001_sync 0000000094 l
96 | 2011_09_26/2011_09_26_drive_0001_sync 0000000095 l
97 | 2011_09_26/2011_09_26_drive_0001_sync 0000000096 l
98 | 2011_09_26/2011_09_26_drive_0001_sync 0000000097 l
99 | 2011_09_26/2011_09_26_drive_0001_sync 0000000098 l
100 | 2011_09_26/2011_09_26_drive_0001_sync 0000000099 l
101 | 2011_09_26/2011_09_26_drive_0001_sync 0000000100 l
102 | 2011_09_26/2011_09_26_drive_0001_sync 0000000101 l
103 | 2011_09_26/2011_09_26_drive_0001_sync 0000000102 l
104 | 2011_09_26/2011_09_26_drive_0001_sync 0000000103 l
105 | 2011_09_26/2011_09_26_drive_0001_sync 0000000104 l
106 | 2011_09_26/2011_09_26_drive_0001_sync 0000000105 l
107 | 2011_09_26/2011_09_26_drive_0001_sync 0000000106 l
108 | 2011_09_26/2011_09_26_drive_0001_sync 0000000107 l
109 |
--------------------------------------------------------------------------------
/mono/datasets/splits/short/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding:utf-8 -*-
3 | # Author: Duanzhixiang(zhixiangduan@deepmotion.ai)
--------------------------------------------------------------------------------
/mono/datasets/splits/short/train_files.txt:
--------------------------------------------------------------------------------
1 | 2011_09_26/2011_09_26_drive_0022_sync 473 r
2 | 2011_09_29/2011_09_29_drive_0026_sync 1 l
3 | 2011_09_26/2011_09_26_drive_0087_sync 185 r
4 | 2011_09_30/2011_09_30_drive_0028_sync 497 l
5 | 2011_10_03/2011_10_03_drive_0034_sync 215 l
6 | 2011_10_03/2011_10_03_drive_0042_sync 514 r
7 | 2011_09_30/2011_09_30_drive_0028_sync 2975 l
8 | 2011_10_03/2011_10_03_drive_0034_sync 1214 r
9 | 2011_09_26/2011_09_26_drive_0061_sync 601 l
10 | 2011_09_30/2011_09_30_drive_0028_sync 1924 l
11 | 2011_09_26/2011_09_26_drive_0091_sync 270 r
12 | 2011_09_30/2011_09_30_drive_0033_sync 979 r
13 | 2011_09_29/2011_09_29_drive_0004_sync 288 l
14 | 2011_09_30/2011_09_30_drive_0033_sync 1029 r
15 | 2011_09_30/2011_09_30_drive_0028_sync 5004 r
16 | 2011_09_26/2011_09_26_drive_0051_sync 138 r
17 | 2011_10_03/2011_10_03_drive_0034_sync 3247 l
18 | 2011_09_26/2011_09_26_drive_0014_sync 285 l
19 | 2011_09_30/2011_09_30_drive_0028_sync 573 l
20 | 2011_09_26/2011_09_26_drive_0051_sync 425 r
21 | 2011_09_30/2011_09_30_drive_0028_sync 2380 l
22 | 2011_09_30/2011_09_30_drive_0028_sync 1323 l
23 | 2011_09_30/2011_09_30_drive_0028_sync 733 r
24 | 2011_09_26/2011_09_26_drive_0087_sync 331 r
25 | 2011_09_30/2011_09_30_drive_0028_sync 536 l
26 | 2011_09_30/2011_09_30_drive_0028_sync 2935 r
27 | 2011_10_03/2011_10_03_drive_0034_sync 562 l
28 | 2011_09_26/2011_09_26_drive_0032_sync 226 r
29 | 2011_09_30/2011_09_30_drive_0028_sync 5148 r
30 | 2011_10_03/2011_10_03_drive_0034_sync 1355 r
31 | 2011_10_03/2011_10_03_drive_0034_sync 2695 l
32 | 2011_09_30/2011_09_30_drive_0028_sync 3546 r
33 | 2011_10_03/2011_10_03_drive_0034_sync 4023 r
34 | 2011_09_26/2011_09_26_drive_0051_sync 301 r
35 | 2011_09_30/2011_09_30_drive_0028_sync 402 r
36 | 2011_09_30/2011_09_30_drive_0033_sync 294 r
37 | 2011_09_30/2011_09_30_drive_0033_sync 1106 r
38 | 2011_09_30/2011_09_30_drive_0028_sync 4906 r
39 | 2011_10_03/2011_10_03_drive_0034_sync 1504 l
40 | 2011_10_03/2011_10_03_drive_0042_sync 478 r
41 | 2011_09_30/2011_09_30_drive_0033_sync 980 l
42 | 2011_09_30/2011_09_30_drive_0028_sync 684 l
43 | 2011_09_30/2011_09_30_drive_0028_sync 3418 r
44 | 2011_09_26/2011_09_26_drive_0028_sync 68 r
45 | 2011_09_26/2011_09_26_drive_0039_sync 245 l
46 | 2011_09_26/2011_09_26_drive_0087_sync 363 l
47 | 2011_10_03/2011_10_03_drive_0034_sync 655 r
48 | 2011_10_03/2011_10_03_drive_0034_sync 3379 l
49 | 2011_10_03/2011_10_03_drive_0034_sync 684 r
50 | 2011_09_26/2011_09_26_drive_0018_sync 100 l
51 | 2011_09_26/2011_09_26_drive_0104_sync 163 l
52 | 2011_10_03/2011_10_03_drive_0034_sync 2587 l
53 | 2011_09_30/2011_09_30_drive_0028_sync 663 l
54 | 2011_09_30/2011_09_30_drive_0033_sync 273 r
55 | 2011_10_03/2011_10_03_drive_0042_sync 768 r
56 | 2011_09_30/2011_09_30_drive_0033_sync 1543 l
57 | 2011_10_03/2011_10_03_drive_0034_sync 4614 r
58 | 2011_10_03/2011_10_03_drive_0034_sync 475 l
59 | 2011_09_30/2011_09_30_drive_0028_sync 3297 l
60 | 2011_09_26/2011_09_26_drive_0039_sync 165 l
61 | 2011_09_30/2011_09_30_drive_0028_sync 1031 l
62 | 2011_10_03/2011_10_03_drive_0034_sync 2656 l
63 | 2011_10_03/2011_10_03_drive_0042_sync 66 r
64 | 2011_10_03/2011_10_03_drive_0042_sync 297 r
65 | 2011_09_30/2011_09_30_drive_0028_sync 2604 l
66 | 2011_09_26/2011_09_26_drive_0104_sync 97 r
67 | 2011_10_03/2011_10_03_drive_0034_sync 3787 l
68 | 2011_09_30/2011_09_30_drive_0028_sync 2946 l
69 | 2011_10_03/2011_10_03_drive_0034_sync 1184 l
70 | 2011_10_03/2011_10_03_drive_0042_sync 458 l
71 | 2011_09_30/2011_09_30_drive_0028_sync 4166 r
72 | 2011_09_30/2011_09_30_drive_0028_sync 4922 l
73 | 2011_09_30/2011_09_30_drive_0033_sync 1323 l
74 | 2011_10_03/2011_10_03_drive_0034_sync 1819 l
75 | 2011_10_03/2011_10_03_drive_0042_sync 569 l
76 | 2011_09_26/2011_09_26_drive_0070_sync 205 l
77 | 2011_10_03/2011_10_03_drive_0042_sync 249 l
78 | 2011_09_30/2011_09_30_drive_0034_sync 371 r
79 | 2011_09_26/2011_09_26_drive_0039_sync 104 l
80 | 2011_09_30/2011_09_30_drive_0028_sync 382 r
81 | 2011_09_26/2011_09_26_drive_0087_sync 295 l
82 | 2011_09_30/2011_09_30_drive_0028_sync 3023 l
83 | 2011_10_03/2011_10_03_drive_0042_sync 591 l
84 | 2011_10_03/2011_10_03_drive_0034_sync 1472 l
85 | 2011_09_26/2011_09_26_drive_0001_sync 77 r
86 | 2011_10_03/2011_10_03_drive_0034_sync 3269 l
87 | 2011_09_30/2011_09_30_drive_0020_sync 185 r
88 | 2011_10_03/2011_10_03_drive_0034_sync 2437 r
89 | 2011_10_03/2011_10_03_drive_0034_sync 4050 l
90 | 2011_09_26/2011_09_26_drive_0039_sync 147 r
91 | 2011_09_30/2011_09_30_drive_0028_sync 4741 l
92 | 2011_09_30/2011_09_30_drive_0028_sync 3557 r
93 | 2011_10_03/2011_10_03_drive_0034_sync 394 l
94 | 2011_09_30/2011_09_30_drive_0028_sync 158 r
95 | 2011_10_03/2011_10_03_drive_0034_sync 1804 l
96 | 2011_09_29/2011_09_29_drive_0004_sync 62 r
97 | 2011_09_30/2011_09_30_drive_0028_sync 220 l
98 | 2011_10_03/2011_10_03_drive_0034_sync 1420 r
99 | 2011_10_03/2011_10_03_drive_0034_sync 2310 l
100 | 2011_09_30/2011_09_30_drive_0034_sync 839 r
--------------------------------------------------------------------------------
/mono/datasets/utils.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding:utf-8 -*-
3 | # Author: Duanzhixiang(zhixiangduan@deepmotion.ai)
4 |
5 | from __future__ import absolute_import, division, print_function
6 | import torch
7 | import numpy as np
8 | import cv2
9 |
10 |
11 | def readlines(filename):
12 | """Read all the lines in a text file and return as a list
13 | """
14 | with open(filename, 'r') as f:
15 | lines = f.read().splitlines()
16 | return lines
17 |
18 |
19 | def normalize_image(x):
20 | """Rescale image pixels to span range [0, 1]
21 | """
22 | ma = float(x.max().cpu().data)
23 | mi = float(x.min().cpu().data)
24 | d = ma - mi if ma != mi else 1e5
25 | return (x - mi) / d
26 |
27 |
28 | def sec_to_hm(t):
29 | """Convert time in seconds to time in hours, minutes and seconds
30 | e.g. 10239 -> (2, 50, 39)
31 | """
32 | t = int(t)
33 | s = t % 60
34 | t //= 60
35 | m = t % 60
36 | t //= 60
37 | return t, m, s
38 |
39 |
40 | def sec_to_hm_str(t):
41 | """Convert time in seconds to a nice string
42 | e.g. 10239 -> '02h50m39s'
43 | """
44 | h, m, s = sec_to_hm(t)
45 | return "{:02d}h{:02d}m{:02d}s".format(h, m, s)
46 |
47 |
48 | def transformation_from_parameters(axisangle, translation, invert=False):
49 | R = rot_from_axisangle(axisangle)
50 | t = translation.clone()
51 | if invert:
52 | R = R.transpose(1, 2)
53 | t *= -1
54 | T = get_translation_matrix(t)
55 | if invert:
56 | M = torch.matmul(R, T)
57 | else:
58 | M = torch.matmul(T, R)
59 | return M
60 |
61 |
62 | def get_translation_matrix(translation_vector):
63 | T = torch.zeros(translation_vector.shape[0], 4, 4).cuda()
64 | t = translation_vector.contiguous().view(-1, 3, 1)
65 | T[:, 0, 0] = 1
66 | T[:, 1, 1] = 1
67 | T[:, 2, 2] = 1
68 | T[:, 3, 3] = 1
69 | T[:, :3, 3, None] = t
70 | return T
71 |
72 |
73 | def rot_from_axisangle(vec):
74 | angle = torch.norm(vec, 2, 2, True)
75 | axis = vec / (angle + 1e-7)
76 | ca = torch.cos(angle)
77 | sa = torch.sin(angle)
78 | C = 1 - ca
79 | x = axis[..., 0].unsqueeze(1)
80 | y = axis[..., 1].unsqueeze(1)
81 | z = axis[..., 2].unsqueeze(1)
82 | xs = x * sa
83 | ys = y * sa
84 | zs = z * sa
85 | xC = x * C
86 | yC = y * C
87 | zC = z * C
88 | xyC = x * yC
89 | yzC = y * zC
90 | zxC = z * xC
91 | rot = torch.zeros((vec.shape[0], 4, 4)).cuda()
92 | rot[:, 0, 0] = torch.squeeze(x * xC + ca)
93 | rot[:, 0, 1] = torch.squeeze(xyC - zs)
94 | rot[:, 0, 2] = torch.squeeze(zxC + ys)
95 | rot[:, 1, 0] = torch.squeeze(xyC + zs)
96 | rot[:, 1, 1] = torch.squeeze(y * yC + ca)
97 | rot[:, 1, 2] = torch.squeeze(yzC - xs)
98 | rot[:, 2, 0] = torch.squeeze(zxC - ys)
99 | rot[:, 2, 1] = torch.squeeze(yzC + xs)
100 | rot[:, 2, 2] = torch.squeeze(z * zC + ca)
101 | rot[:, 3, 3] = 1
102 | return rot
103 |
104 |
105 | def dump_xyz(source_to_target_transformations):
106 | xyzs = []
107 | cam_to_world = np.eye(4)
108 | xyzs.append(cam_to_world[:3, 3])
109 | for source_to_target_transformation in source_to_target_transformations:
110 | cam_to_world = np.dot(cam_to_world, source_to_target_transformation)
111 | xyzs.append(cam_to_world[:3, 3])
112 | return xyzs
113 |
114 |
115 | def compute_ate(gtruth_xyz, pred_xyz_o):
116 | offset = gtruth_xyz[0] - pred_xyz_o[0]
117 | pred_xyz = pred_xyz_o + offset[None, :]
118 |
119 | scale = np.sum(gtruth_xyz * pred_xyz) / np.sum(pred_xyz ** 2)
120 | alignment_error = pred_xyz * scale - gtruth_xyz
121 | rmse = np.sqrt(np.sum(alignment_error ** 2)) / gtruth_xyz.shape[0]
122 | return rmse
123 |
124 |
125 | def extract_match(queryImage, trainImage, num):
126 | orb = cv2.ORB_create()
127 | kp_query, des_query = orb.detectAndCompute(queryImage, None)
128 | kp_train, des_train = orb.detectAndCompute(trainImage, None)
129 | bf = cv2.BFMatcher(cv2.NORM_HAMMING, crossCheck=True)
130 | matches = bf.match(des_query, des_train)
131 | matches = sorted(matches, key=lambda x: x.distance)
132 | query_position = []
133 | train_position = []
134 | for i in range(num):
135 | match = matches[i]
136 | queryIdx = match.queryIdx
137 | trainIdx = match.trainIdx
138 | query_position.append(kp_query[queryIdx].pt)
139 | train_position.append(kp_train[trainIdx].pt)
140 | return query_position, train_position
141 |
142 |
143 | def compute_errors(gt, pred):
144 | """Computation of error metrics between predicted and ground truth depths
145 | """
146 | thresh = np.maximum((gt / pred), (pred / gt))
147 | a1 = (thresh < 1.25 ).mean()
148 | a2 = (thresh < 1.25 ** 2).mean()
149 | a3 = (thresh < 1.25 ** 3).mean()
150 |
151 | rmse = (gt - pred) ** 2
152 | rmse = np.sqrt(rmse.mean())
153 |
154 | rmse_log = (np.log(gt) - np.log(pred)) ** 2
155 | rmse_log = np.sqrt(rmse_log.mean())
156 |
157 | abs_rel = np.mean(np.abs(gt - pred) / gt)
158 |
159 | sq_rel = np.mean(((gt - pred) ** 2) / gt)
160 |
161 | return abs_rel, sq_rel, rmse, rmse_log, a1, a2, a3
162 |
163 |
164 | def batch_post_process_disparity(l_disp, r_disp):
165 | """Apply the disparity post-processing method as introduced in Monodepthv1
166 | """
167 | _, h, w = l_disp.shape
168 | m_disp = 0.5 * (l_disp + r_disp)
169 | l, _ = np.meshgrid(np.linspace(0, 1, w), np.linspace(0, 1, h))
170 | l_mask = (1.0 - np.clip(20 * (l - 0.05), 0, 1))[None, ...]
171 | r_mask = l_mask[:, :, ::-1]
172 | return r_mask * l_disp + l_mask * r_disp + (1.0 - l_mask - r_mask) * m_disp
--------------------------------------------------------------------------------
/mono/model/__init__.py:
--------------------------------------------------------------------------------
1 | from .mono_baseline.net import Baseline
2 | from .mono_autoencoder.net import autoencoder
3 | from .mono_fm.net import mono_fm
4 | from .mono_fm_joint.net import mono_fm_joint
--------------------------------------------------------------------------------
/mono/model/mono_autoencoder/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sconlyshootery/FeatDepth/550420b3fb51a027549716b74c6fbce41651d3a5/mono/model/mono_autoencoder/__init__.py
--------------------------------------------------------------------------------
/mono/model/mono_autoencoder/decoder.py:
--------------------------------------------------------------------------------
1 | # import torch
2 | # import torch.nn as nn
3 | # import torch.nn.functional as F
4 | # from .layers import Conv1x1, Conv3x3, CRPBlock, upsample
5 | #
6 | #
7 | # class Decoder(nn.Module):
8 | # def __init__(self, num_ch_enc):
9 | # super(Decoder, self).__init__()
10 | #
11 | # bottleneck = 256
12 | # stage = 4
13 | # self.do = nn.Dropout(p=0.5)
14 | #
15 | # self.reduce4 = Conv1x1(num_ch_enc[4], 512, bias=False)
16 | # self.reduce3 = Conv1x1(num_ch_enc[3], bottleneck, bias=False)
17 | # self.reduce2 = Conv1x1(num_ch_enc[2], bottleneck, bias=False)
18 | # self.reduce1 = Conv1x1(num_ch_enc[1], bottleneck, bias=False)
19 | #
20 | # self.iconv4 = Conv3x3(512, bottleneck)
21 | # self.iconv3 = Conv3x3(bottleneck, bottleneck)
22 | # self.iconv2 = Conv3x3(bottleneck, bottleneck)
23 | # self.iconv1 = Conv3x3(bottleneck, bottleneck)
24 | #
25 | # self.crp4 = self._make_crp(bottleneck, bottleneck, stage)
26 | # self.crp3 = self._make_crp(bottleneck, bottleneck, stage)
27 | # self.crp2 = self._make_crp(bottleneck, bottleneck, stage)
28 | # self.crp1 = self._make_crp(bottleneck, bottleneck, stage)
29 | #
30 | # self.merge4 = Conv3x3(bottleneck, bottleneck)
31 | # self.merge3 = Conv3x3(bottleneck, bottleneck)
32 | # self.merge2 = Conv3x3(bottleneck, bottleneck)
33 | # self.merge1 = Conv3x3(bottleneck, bottleneck)
34 | #
35 | # # disp
36 | # self.disp4 = nn.Sequential(Conv3x3(bottleneck, 3), nn.Sigmoid())
37 | # self.disp3 = nn.Sequential(Conv3x3(bottleneck, 3), nn.Sigmoid())
38 | # self.disp2 = nn.Sequential(Conv3x3(bottleneck, 3), nn.Sigmoid())
39 | # self.disp1 = nn.Sequential(Conv3x3(bottleneck, 3), nn.Sigmoid())
40 | #
41 | # def _make_crp(self, in_planes, out_planes, stages):
42 | # layers = [CRPBlock(in_planes, out_planes,stages)]
43 | # return nn.Sequential(*layers)
44 | #
45 | # def forward(self, input_features, frame_id):
46 | # self.outputs = {}
47 | # l0, l1, l2, l3, l4 = input_features
48 | #
49 | # x4 = self.reduce4(l4)
50 | # x4 = self.iconv4(x4)
51 | # x4 = F.leaky_relu(x4)
52 | # x4 = self.crp4(x4)
53 | # x4 = self.merge4(x4)
54 | # x4 = F.leaky_relu(x4)
55 | # x4 = upsample(x4)
56 | # disp4 = self.disp4(x4)
57 | #
58 | #
59 | # x3 = self.reduce3(x4)
60 | # x3 = self.iconv3(x3)
61 | # x3 = F.leaky_relu(x3)
62 | # x3 = self.crp3(x3)
63 | # x3 = self.merge3(x3)
64 | # x3 = F.leaky_relu(x3)
65 | # x3 = upsample(x3)
66 | # disp3 = self.disp3(x3)
67 | #
68 | #
69 | # x2 = self.reduce2(l2)
70 | # x2 = torch.cat((x2), 1)
71 | # x2 = self.iconv2(x2)
72 | # x2 = F.leaky_relu(x2)
73 | # x2 = self.crp2(x2)
74 | # x2 = self.merge2(x2)
75 | # x2 = F.leaky_relu(x2)
76 | # x2 = upsample(x2)
77 | # disp2 = self.disp2(x2)
78 | #
79 | # x1 = self.reduce1(l1)
80 | # x1 = torch.cat((x1), 1)
81 | # x1 = self.iconv1(x1)
82 | # x1 = F.leaky_relu(x1)
83 | # x1 = self.crp1(x1)
84 | # x1 = self.merge1(x1)
85 | # x1 = F.leaky_relu(x1)
86 | # x1 = upsample(x1)
87 | # disp1 = self.disp1(x1)
88 | #
89 | # self.outputs[("disp", frame_id, 3)] = disp4
90 | # self.outputs[("disp", frame_id, 2)] = disp3
91 | # self.outputs[("disp", frame_id, 1)] = disp2
92 | # self.outputs[("disp", frame_id, 0)] = disp1
93 | #
94 | # return self.outputs
95 |
96 |
97 | from __future__ import absolute_import, division, print_function
98 | import torch.nn as nn
99 | from .layers import ConvBlock, Conv3x3, upsample
100 |
101 |
102 | class Decoder(nn.Module):
103 | def __init__(self, num_ch_enc, num_output_channels=3):
104 | super(Decoder, self).__init__()
105 |
106 | num_ch_dec = [16, 32, 64, 128, 256]
107 |
108 | # upconv
109 | self.upconv5 = ConvBlock(num_ch_enc[4], num_ch_dec[4])
110 | self.upconv4 = ConvBlock(num_ch_dec[4], num_ch_dec[3])
111 | self.upconv3 = ConvBlock(num_ch_dec[3], num_ch_dec[2])
112 | self.upconv2 = ConvBlock(num_ch_dec[2], num_ch_dec[1])
113 | self.upconv1 = ConvBlock(num_ch_dec[1], num_ch_dec[0])
114 |
115 | # iconv
116 | self.iconv5 = ConvBlock(num_ch_dec[4], num_ch_dec[4])
117 | self.iconv4 = ConvBlock(num_ch_dec[3], num_ch_dec[3])
118 | self.iconv3 = ConvBlock(num_ch_dec[2], num_ch_dec[2])
119 | self.iconv2 = ConvBlock(num_ch_dec[1], num_ch_dec[1])
120 | self.iconv1 = ConvBlock(num_ch_dec[0], num_ch_dec[0])
121 |
122 | # disp
123 | self.disp4 = Conv3x3(num_ch_dec[3], num_output_channels)
124 | self.disp3 = Conv3x3(num_ch_dec[2], num_output_channels)
125 | self.disp2 = Conv3x3(num_ch_dec[1], num_output_channels)
126 | self.disp1 = Conv3x3(num_ch_dec[0], num_output_channels)
127 |
128 | self.sigmoid = nn.Sigmoid()
129 |
130 |
131 | def forward(self, input_features, frame_id=0):
132 | self.outputs = {}
133 | _, _, _, _, econv5 = input_features
134 | # (64,64,128,256,512)*4
135 |
136 | upconv5 = upsample(self.upconv5(econv5))
137 | iconv5 = self.iconv5(upconv5)
138 |
139 | upconv4 = upsample(self.upconv4(iconv5))
140 | iconv4 = self.iconv4(upconv4)
141 |
142 | upconv3 = upsample(self.upconv3(iconv4))
143 | iconv3 = self.iconv3(upconv3)
144 |
145 | upconv2 = upsample(self.upconv2(iconv3))
146 | iconv2 = self.iconv2(upconv2)
147 |
148 | upconv1 = upsample(self.upconv1(iconv2))
149 | iconv1 = self.iconv1(upconv1)
150 |
151 | self.outputs[("disp", frame_id, 3)] = self.sigmoid(self.disp4(iconv4))
152 | self.outputs[("disp", frame_id, 2)] = self.sigmoid(self.disp3(iconv3))
153 | self.outputs[("disp", frame_id, 1)] = self.sigmoid(self.disp2(iconv2))
154 | self.outputs[("disp", frame_id, 0)] = self.sigmoid(self.disp1(iconv1))
155 | return self.outputs
--------------------------------------------------------------------------------
/mono/model/mono_autoencoder/encoder.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import, division, print_function
2 | import numpy as np
3 | import torch
4 | import torch.nn as nn
5 | from .resnet import resnet18, resnet34, resnet50, resnet101
6 |
7 |
8 | class Encoder(nn.Module):
9 | def __init__(self, num_layers, pretrained_path=None):
10 | super(Encoder, self).__init__()
11 |
12 | self.num_ch_enc = np.array([64, 64, 128, 256, 512])
13 |
14 | resnets = {18: resnet18,
15 | 34: resnet34,
16 | 50: resnet50,
17 | 101: resnet101,}
18 |
19 | if num_layers not in resnets:
20 | raise ValueError("{} is not a valid number of resnet layers".format(num_layers))
21 |
22 |
23 | self.encoder = resnets[num_layers]()
24 | if pretrained_path is not None:
25 | checkpoint = torch.load(pretrained_path)
26 | self.encoder.load_state_dict(checkpoint)
27 |
28 | if num_layers > 34:
29 | self.num_ch_enc[1:] *= 4
30 |
31 | # for name, param in self.encoder.named_parameters():
32 | # if 'bn' in name:
33 | # param.requires_grad = False
34 |
35 | def forward(self, input_image):
36 | self.features = []
37 | self.features.append(self.encoder.relu(self.encoder.bn1(self.encoder.conv1(input_image))))
38 | self.features.append(self.encoder.layer1(self.encoder.maxpool(self.features[-1])))
39 | self.features.append(self.encoder.layer2(self.features[-1]))
40 | self.features.append(self.encoder.layer3(self.features[-1]))
41 | self.features.append(self.encoder.layer4(self.features[-1]))
42 |
43 | return self.features
44 |
--------------------------------------------------------------------------------
/mono/model/mono_autoencoder/layers.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import, division, print_function
2 |
3 | import torch
4 | import torch.nn as nn
5 | import torch.nn.functional as F
6 |
7 |
8 | class SSIM(nn.Module):
9 | def __init__(self):
10 | super(SSIM, self).__init__()
11 | self.mu_x_pool = nn.AvgPool2d(3, 1)
12 | self.mu_y_pool = nn.AvgPool2d(3, 1)
13 | self.sig_x_pool = nn.AvgPool2d(3, 1)
14 | self.sig_y_pool = nn.AvgPool2d(3, 1)
15 | self.sig_xy_pool = nn.AvgPool2d(3, 1)
16 | self.refl = nn.ReflectionPad2d(1)
17 | self.C1 = 0.01 ** 2
18 | self.C2 = 0.03 ** 2
19 |
20 | def forward(self, x, y):
21 | x = self.refl(x)
22 | y = self.refl(y)
23 | mu_x = self.mu_x_pool(x)
24 | mu_y = self.mu_y_pool(y)
25 | sigma_x = self.sig_x_pool(x ** 2) - mu_x ** 2
26 | sigma_y = self.sig_y_pool(y ** 2) - mu_y ** 2
27 | sigma_xy = self.sig_xy_pool(x * y) - mu_x * mu_y
28 | SSIM_n = (2 * mu_x * mu_y + self.C1) * (2 * sigma_xy + self.C2)
29 | SSIM_d = (mu_x ** 2 + mu_y ** 2 + self.C1) * (sigma_x + sigma_y + self.C2)
30 | return torch.clamp((1 - SSIM_n / SSIM_d) / 2, 0, 1)
31 |
32 |
33 | def upsample(x):
34 | return F.interpolate(x, scale_factor=2, mode="nearest")
35 |
36 |
37 | class ConvBlock(nn.Module):
38 | def __init__(self, in_channels, out_channels):
39 | super(ConvBlock, self).__init__()
40 | self.conv = Conv3x3(in_channels, out_channels)
41 | self.nonlin = nn.ELU(inplace=True)
42 | def forward(self, x):
43 | out = self.conv(x)
44 | out = self.nonlin(out)
45 | return out
46 |
47 |
48 | class Conv1x1(nn.Module):
49 | def __init__(self, in_channels, out_channels, bias=False):
50 | super(Conv1x1, self).__init__()
51 | self.conv = nn.Conv2d(int(in_channels), int(out_channels), kernel_size=1, stride=1, bias=bias)
52 | def forward(self, x):
53 | out = self.conv(x)
54 | return out
55 |
56 |
57 | class Conv3x3(nn.Module):
58 | def __init__(self, in_channels, out_channels, use_refl=True):
59 | super(Conv3x3, self).__init__()
60 | if use_refl:
61 | self.pad = nn.ReflectionPad2d(1)
62 | else:
63 | self.pad = nn.ZeroPad2d(1)
64 | self.conv = nn.Conv2d(int(in_channels), int(out_channels), 3)
65 | def forward(self, x):
66 | out = self.pad(x)
67 | out = self.conv(out)
68 | return out
69 |
70 |
71 | class Conv5x5(nn.Module):
72 | def __init__(self, in_channels, out_channels, use_refl=True):
73 | super(Conv5x5, self).__init__()
74 | if use_refl:
75 | self.pad = nn.ReflectionPad2d(2)
76 | else:
77 | self.pad = nn.ZeroPad2d(2)
78 | self.conv = nn.Conv2d(int(in_channels), int(out_channels), 5)
79 | def forward(self, x):
80 | out = self.pad(x)
81 | out = self.conv(out)
82 | return out
83 |
84 |
85 | class CRPBlock(nn.Module):
86 | def __init__(self, in_planes, out_planes, n_stages):
87 | super(CRPBlock, self).__init__()
88 | for i in range(n_stages):
89 | setattr(self, '{}_{}'.format(i + 1, 'pointwise'), Conv1x1(in_planes if (i == 0) else out_planes, out_planes, False))
90 | self.stride = 1
91 | self.n_stages = n_stages
92 | self.maxpool = nn.MaxPool2d(kernel_size=5, stride=1, padding=2)
93 |
94 | def forward(self, x):
95 | top = x
96 | for i in range(self.n_stages):
97 | top = self.maxpool(top)
98 | top = getattr(self, '{}_{}'.format(i + 1, 'pointwise'))(top)
99 | x = top + x
100 | return x
101 |
102 |
103 | def compute_depth_errors(gt, pred):
104 | thresh = torch.max((gt / pred), (pred / gt))
105 | a1 = (thresh < 1.25 ).float().mean()
106 | a2 = (thresh < 1.25 ** 2).float().mean()
107 | a3 = (thresh < 1.25 ** 3).float().mean()
108 | rmse = (gt - pred) ** 2
109 | rmse = torch.sqrt(rmse.mean())
110 | rmse_log = (torch.log(gt) - torch.log(pred)) ** 2
111 | rmse_log = torch.sqrt(rmse_log.mean())
112 | abs_rel = torch.mean(torch.abs(gt - pred) / gt)
113 | sq_rel = torch.mean((gt - pred) ** 2 / gt)
114 | return abs_rel, sq_rel, rmse, rmse_log, a1, a2, a3
--------------------------------------------------------------------------------
/mono/model/mono_autoencoder/net.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import, division, print_function
2 | import torch
3 | import torch.nn.functional as F
4 | import torch.nn as nn
5 |
6 | import os
7 | import matplotlib.pyplot as plt
8 |
9 | from .layers import SSIM
10 | from .encoder import Encoder
11 | from .decoder import Decoder
12 | from ..registry import MONO
13 |
14 |
15 | @MONO.register_module
16 | class autoencoder(nn.Module):
17 | def __init__(self, options):
18 | super(autoencoder, self).__init__()
19 | self.opt = options
20 |
21 | self.Encoder = Encoder(self.opt.depth_num_layers, self.opt.depth_pretrained_path)
22 | self.Decoder = Decoder(self.Encoder.num_ch_enc)
23 |
24 | self.ssim = SSIM()
25 | self.count = 0
26 |
27 | def forward(self, inputs):
28 | features = self.Encoder(inputs[("color", 0, 0)])
29 | outputs = self.Decoder(features, 0)
30 | if self.training:
31 | loss_dict = self.compute_losses(inputs, outputs, features)
32 | return outputs, loss_dict
33 | return outputs
34 |
35 | def robust_l1(self, pred, target):
36 | eps = 1e-3
37 | return torch.sqrt(torch.pow(target - pred, 2) + eps ** 2)
38 |
39 | def compute_reprojection_loss(self, pred, target):
40 | photometric_loss = self.robust_l1(pred, target).mean(1, True)
41 | ssim_loss = self.ssim(pred, target).mean(1, True)
42 | reprojection_loss = (0.85 * ssim_loss + 0.15 * photometric_loss)
43 | return reprojection_loss
44 |
45 | def compute_losses(self, inputs, outputs, features):
46 | loss_dict = {}
47 | interval = 1000
48 | target = inputs[("color", 0, 0)]
49 | for i in range(5):
50 | f=features[i]
51 | smooth_loss = self.get_smooth_loss(f, target)
52 | loss_dict[('smooth_loss', i)] = smooth_loss/ (2 ** i)/5
53 |
54 | for scale in self.opt.scales:
55 | """
56 | initialization
57 | """
58 | pred = outputs[("disp", 0, scale)]
59 |
60 | _,_,h,w = pred.size()
61 | target = F.interpolate(target, [h, w], mode="bilinear", align_corners=False)
62 | min_reconstruct_loss = self.compute_reprojection_loss(pred, target)
63 | loss_dict[('min_reconstruct_loss', scale)] = min_reconstruct_loss.mean()/len(self.opt.scales)
64 |
65 | if self.count % interval == 0:
66 | img_path = os.path.join('/node01_data5/monodepth2-test/odo', 'auto_{:0>4d}_{}.png'.format(self.count // interval, scale))
67 | plt.imsave(img_path, pred[0].transpose(0,1).transpose(1,2).data.cpu().numpy())
68 | img_path = os.path.join('/node01_data5/monodepth2-test/odo', 'img_{:0>4d}_{}.png'.format(self.count // interval, scale))
69 | plt.imsave(img_path, target[0].transpose(0, 1).transpose(1, 2).data.cpu().numpy())
70 |
71 | self.count += 1
72 | return loss_dict
73 |
74 | def get_smooth_loss(self, disp, img):
75 | b, _, h, w = disp.size()
76 | img = F.interpolate(img, (h, w), mode='area')
77 |
78 | disp_dx, disp_dy = self.gradient(disp)
79 | img_dx, img_dy = self.gradient(img)
80 |
81 | disp_dxx, disp_dxy = self.gradient(disp_dx)
82 | disp_dyx, disp_dyy = self.gradient(disp_dy)
83 |
84 | img_dxx, img_dxy = self.gradient(img_dx)
85 | img_dyx, img_dyy = self.gradient(img_dy)
86 |
87 | smooth1 = torch.mean(disp_dx.abs() * torch.exp(-img_dx.abs().mean(1, True))) + \
88 | torch.mean(disp_dy.abs() * torch.exp(-img_dy.abs().mean(1, True)))
89 |
90 | smooth2 = torch.mean(disp_dxx.abs() * torch.exp(-img_dxx.abs().mean(1, True))) + \
91 | torch.mean(disp_dxy.abs() * torch.exp(-img_dxy.abs().mean(1, True))) + \
92 | torch.mean(disp_dyx.abs() * torch.exp(-img_dyx.abs().mean(1, True))) + \
93 | torch.mean(disp_dyy.abs() * torch.exp(-img_dyy.abs().mean(1, True)))
94 |
95 | return -self.opt.dis * smooth1+ self.opt.cvt * smooth2
96 |
97 | def gradient(self, D):
98 | dy = D[:, :, 1:] - D[:, :, :-1]
99 | dx = D[:, :, :, 1:] - D[:, :, :, :-1]
100 | return dx, dy
101 |
102 |
--------------------------------------------------------------------------------
/mono/model/mono_autoencoder/resnet.py:
--------------------------------------------------------------------------------
1 | import os.path as osp
2 | import torch
3 | import torch.nn as nn
4 | from torch.nn import BatchNorm2d as bn
5 |
6 | def conv3x3(in_planes, out_planes, stride=1):
7 | """3x3 convolution with padding"""
8 | return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, padding=1, bias=False)
9 |
10 |
11 | def conv1x1(in_planes, out_planes, stride=1):
12 | """1x1 convolution"""
13 | return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False)
14 |
15 |
16 | class BasicBlock(nn.Module):
17 | expansion = 1
18 |
19 | def __init__(self, inplanes, planes, stride=1, downsample=None):
20 | super(BasicBlock, self).__init__()
21 | self.conv1 = conv3x3(inplanes, planes, stride)
22 | self.bn1 = bn(planes)
23 | self.relu = nn.ReLU(inplace=True)
24 | self.conv2 = conv3x3(planes, planes)
25 | self.bn2 = bn(planes)
26 | self.downsample = downsample
27 | self.stride = stride
28 |
29 | def forward(self, x):
30 | residual = x
31 |
32 | out = self.conv1(x)
33 | out = self.bn1(out)
34 | out = self.relu(out)
35 |
36 | out = self.conv2(out)
37 | out = self.bn2(out)
38 |
39 | if self.downsample is not None:
40 | residual = self.downsample(x)
41 |
42 | out += residual
43 | out = self.relu(out)
44 |
45 | return out
46 |
47 |
48 | class Bottleneck(nn.Module):
49 | expansion = 4
50 |
51 | def __init__(self, inplanes, planes, stride=1, downsample=None):
52 | super(Bottleneck, self).__init__()
53 | self.conv1 = conv1x1(inplanes, planes)
54 | self.bn1 = bn(planes)
55 | self.conv2 = conv3x3(planes, planes, stride)
56 | self.bn2 = bn(planes)
57 | self.conv3 = conv1x1(planes, planes * self.expansion)
58 | self.bn3 = bn(planes * self.expansion)
59 | self.relu = nn.ReLU(inplace=True)
60 | self.downsample = downsample
61 | self.stride = stride
62 |
63 | def forward(self, x):
64 | residual = x
65 |
66 | out = self.conv1(x)
67 | out = self.bn1(out)
68 | out = self.relu(out)
69 |
70 | out = self.conv2(out)
71 | out = self.bn2(out)
72 | out = self.relu(out)
73 |
74 | out = self.conv3(out)
75 | out = self.bn3(out)
76 |
77 | if self.downsample is not None:
78 | residual = self.downsample(x)
79 |
80 | out += residual
81 | out = self.relu(out)
82 |
83 | return out
84 |
85 |
86 | class ResNet(nn.Module):
87 |
88 | def __init__(self, block, layers, num_classes=1000):
89 | super(ResNet, self).__init__()
90 | self.inplanes = 64
91 | self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False)
92 | self.bn1 = bn(64)
93 | self.relu = nn.ReLU(inplace=True)
94 | self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
95 | self.layer1 = self._make_layer(block, 64, layers[0])
96 | self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
97 | self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
98 | self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
99 | self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
100 | self.fc = nn.Linear(512 * block.expansion, num_classes)
101 |
102 | for m in self.modules():
103 | if isinstance(m, nn.Conv2d):
104 | nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
105 | elif isinstance(m, bn):
106 | nn.init.constant_(m.weight, 1)
107 | nn.init.constant_(m.bias, 0)
108 |
109 | def _make_layer(self, block, planes, blocks, stride=1):
110 | downsample = None
111 | if stride != 1 or self.inplanes != planes * block.expansion:
112 | downsample = nn.Sequential(
113 | conv1x1(self.inplanes, planes * block.expansion, stride),
114 | bn(planes * block.expansion),
115 | )
116 |
117 | layers = []
118 | layers.append(block(self.inplanes, planes, stride, downsample))
119 | self.inplanes = planes * block.expansion
120 | for _ in range(1, blocks):
121 | layers.append(block(self.inplanes, planes))
122 |
123 | return nn.Sequential(*layers)
124 |
125 | def forward(self, x):
126 | x = self.conv1(x)
127 | x = self.bn1(x)
128 | x = self.relu(x)
129 | x = self.maxpool(x)
130 |
131 | x = self.layer1(x)
132 | x = self.layer2(x)
133 | x = self.layer3(x)
134 | x = self.layer4(x)
135 |
136 | return x
137 |
138 |
139 | def resnet18(pretrained_path=None):
140 | """Constructs a ResNet-18 model.
141 | Args:
142 | pretrained (bool): If True, returns a model pre-trained on ImageNet
143 | """
144 | model = ResNet(BasicBlock, [2, 2, 2, 2])
145 | if pretrained_path is not None:
146 | model.load_state_dict(torch.load(pretrained_path))
147 | print('Loaded pre-trained weights')
148 | return model
149 |
150 |
151 | def resnet34(pretrained_path=None, **kwargs):
152 | """Constructs a ResNet-34 model.
153 | Args:
154 | pretrained (bool): If True, returns a model pre-trained on ImageNet
155 | """
156 | model = ResNet(BasicBlock, [3, 4, 6, 3], **kwargs)
157 | if pretrained_path is not None:
158 | model.load_state_dict(torch.load(osp.join(pretrained_path, 'resnet34.pth')))
159 | print('Loaded pre-trained weights')
160 | return model
161 |
162 |
163 | def resnet50(pretrained_path=None, **kwargs):
164 | """Constructs a ResNet-50 model.
165 | Args:
166 | pretrained (bool): If True, returns a model pre-trained on ImageNet
167 | """
168 | model = ResNet(Bottleneck, [3, 4, 6, 3], **kwargs)
169 | if pretrained_path is not None:
170 | model.load_state_dict(torch.load(osp.join(pretrained_path, 'resnet50.pth')))
171 | print('Loaded pre-trained weights')
172 | return model
173 |
174 |
175 | def resnet101(pretrained_path=None, **kwargs):
176 | """Constructs a ResNet-101 model.
177 | Args:
178 | pretrained (bool): If True, returns a model pre-trained on ImageNet
179 | """
180 | model = ResNet(Bottleneck, [3, 4, 23, 3], **kwargs)
181 | if pretrained_path is not None:
182 | model.load_state_dict(torch.load(osp.join(pretrained_path, 'resnet101.pth')))
183 | print('Loaded pre-trained weights')
184 | return model
185 |
--------------------------------------------------------------------------------
/mono/model/mono_baseline/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sconlyshootery/FeatDepth/550420b3fb51a027549716b74c6fbce41651d3a5/mono/model/mono_baseline/__init__.py
--------------------------------------------------------------------------------
/mono/model/mono_baseline/depth_decoder.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | import torch.nn.functional as F
4 | from .layers import Conv1x1, Conv3x3, CRPBlock, upsample
5 |
6 |
7 | class DepthDecoder(nn.Module):
8 | def __init__(self, num_ch_enc):
9 | super(DepthDecoder, self).__init__()
10 |
11 | bottleneck = 256
12 | stage = 4
13 | self.do = nn.Dropout(p=0.5)
14 |
15 | self.reduce4 = Conv1x1(num_ch_enc[4], 512, bias=False)
16 | self.reduce3 = Conv1x1(num_ch_enc[3], bottleneck, bias=False)
17 | self.reduce2 = Conv1x1(num_ch_enc[2], bottleneck, bias=False)
18 | self.reduce1 = Conv1x1(num_ch_enc[1], bottleneck, bias=False)
19 |
20 | self.iconv4 = Conv3x3(512, bottleneck)
21 | self.iconv3 = Conv3x3(bottleneck*2+1, bottleneck)
22 | self.iconv2 = Conv3x3(bottleneck*2+1, bottleneck)
23 | self.iconv1 = Conv3x3(bottleneck*2+1, bottleneck)
24 |
25 | self.crp4 = self._make_crp(bottleneck, bottleneck, stage)
26 | self.crp3 = self._make_crp(bottleneck, bottleneck, stage)
27 | self.crp2 = self._make_crp(bottleneck, bottleneck, stage)
28 | self.crp1 = self._make_crp(bottleneck, bottleneck, stage)
29 |
30 | self.merge4 = Conv3x3(bottleneck, bottleneck)
31 | self.merge3 = Conv3x3(bottleneck, bottleneck)
32 | self.merge2 = Conv3x3(bottleneck, bottleneck)
33 | self.merge1 = Conv3x3(bottleneck, bottleneck)
34 |
35 | # disp
36 | self.disp4 = nn.Sequential(Conv3x3(bottleneck, 1), nn.Sigmoid())
37 | self.disp3 = nn.Sequential(Conv3x3(bottleneck, 1), nn.Sigmoid())
38 | self.disp2 = nn.Sequential(Conv3x3(bottleneck, 1), nn.Sigmoid())
39 | self.disp1 = nn.Sequential(Conv3x3(bottleneck, 1), nn.Sigmoid())
40 |
41 | def _make_crp(self, in_planes, out_planes, stages):
42 | layers = [CRPBlock(in_planes, out_planes,stages)]
43 | return nn.Sequential(*layers)
44 |
45 | def forward(self, input_features, frame_id=0):
46 | self.outputs = {}
47 | l0, l1, l2, l3, l4 = input_features
48 |
49 | l4 = self.do(l4)
50 | l3 = self.do(l3)
51 |
52 | x4 = self.reduce4(l4)
53 | x4 = self.iconv4(x4)
54 | x4 = F.leaky_relu(x4)
55 | x4 = self.crp4(x4)
56 | x4 = self.merge4(x4)
57 | x4 = F.leaky_relu(x4)
58 | x4 = upsample(x4)
59 | disp4 = self.disp4(x4)
60 |
61 |
62 | x3 = self.reduce3(l3)
63 | x3 = torch.cat((x3, x4, disp4), 1)
64 | x3 = self.iconv3(x3)
65 | x3 = F.leaky_relu(x3)
66 | x3 = self.crp3(x3)
67 | x3 = self.merge3(x3)
68 | x3 = F.leaky_relu(x3)
69 | x3 = upsample(x3)
70 | disp3 = self.disp3(x3)
71 |
72 |
73 | x2 = self.reduce2(l2)
74 | x2 = torch.cat((x2, x3 , disp3), 1)
75 | x2 = self.iconv2(x2)
76 | x2 = F.leaky_relu(x2)
77 | x2 = self.crp2(x2)
78 | x2 = self.merge2(x2)
79 | x2 = F.leaky_relu(x2)
80 | x2 = upsample(x2)
81 | disp2 = self.disp2(x2)
82 |
83 | x1 = self.reduce1(l1)
84 | x1 = torch.cat((x1, x2, disp2), 1)
85 | x1 = self.iconv1(x1)
86 | x1 = F.leaky_relu(x1)
87 | x1 = self.crp1(x1)
88 | x1 = self.merge1(x1)
89 | x1 = F.leaky_relu(x1)
90 | x1 = upsample(x1)
91 | disp1 = self.disp1(x1)
92 |
93 | self.outputs[("disp", frame_id, 3)] = disp4
94 | self.outputs[("disp", frame_id, 2)] = disp3
95 | self.outputs[("disp", frame_id, 1)] = disp2
96 | self.outputs[("disp", frame_id, 0)] = disp1
97 |
98 | return self.outputs
99 |
--------------------------------------------------------------------------------
/mono/model/mono_baseline/depth_encoder.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import, division, print_function
2 | import numpy as np
3 | import torch
4 | import torch.nn as nn
5 | from .resnet import resnet18, resnet34, resnet50, resnet101
6 |
7 |
8 | class DepthEncoder(nn.Module):
9 | def __init__(self, num_layers, pretrained_path=None):
10 | super(DepthEncoder, self).__init__()
11 |
12 | self.num_ch_enc = np.array([64, 64, 128, 256, 512])
13 |
14 | resnets = {18: resnet18,
15 | 34: resnet34,
16 | 50: resnet50,
17 | 101: resnet101,}
18 |
19 | if num_layers not in resnets:
20 | raise ValueError("{} is not a valid number of resnet layers".format(num_layers))
21 |
22 |
23 | self.encoder = resnets[num_layers]()
24 | if pretrained_path is not None:
25 | checkpoint = torch.load(pretrained_path)
26 | self.encoder.load_state_dict(checkpoint)
27 |
28 | if num_layers > 34:
29 | self.num_ch_enc[1:] *= 4
30 |
31 | # for name, param in self.encoder.named_parameters():
32 | # if 'bn' in name:
33 | # param.requires_grad = False
34 |
35 | def forward(self, input_image):
36 | self.features = []
37 | x = (input_image - 0.45) / 0.225
38 | self.features.append(self.encoder.relu(self.encoder.bn1(self.encoder.conv1(x))))
39 | self.features.append(self.encoder.layer1(self.encoder.maxpool(self.features[-1])))
40 | self.features.append(self.encoder.layer2(self.features[-1]))
41 | self.features.append(self.encoder.layer3(self.features[-1]))
42 | self.features.append(self.encoder.layer4(self.features[-1]))
43 |
44 | return self.features
45 |
--------------------------------------------------------------------------------
/mono/model/mono_baseline/pose_decoder.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import, division, print_function
2 | import torch.nn as nn
3 |
4 |
5 | class PoseDecoder(nn.Module):
6 | def __init__(self, num_ch_enc, stride=1):
7 | super(PoseDecoder, self).__init__()
8 |
9 | self.reduce = nn.Conv2d(num_ch_enc[-1], 256, 1)
10 | self.conv1 = nn.Conv2d(256, 256, 3, stride, 1)
11 | self.conv2 = nn.Conv2d(256, 256, 3, stride, 1)
12 | self.conv3 = nn.Conv2d(256, 6, 1)
13 |
14 | self.relu = nn.ReLU()
15 |
16 | def forward(self, input_features):
17 | f = input_features[-1]
18 | out = self.relu(self.reduce(f))
19 | out = self.relu(self.conv1(out))
20 | out = self.relu(self.conv2(out))
21 | out = self.conv3(out)
22 | out = out.mean(3).mean(2)
23 | out = 0.01 * out.view(-1, 1, 1, 6)
24 | axisangle = out[..., :3]
25 | translation = out[..., 3:]
26 | return axisangle, translation
27 |
--------------------------------------------------------------------------------
/mono/model/mono_baseline/pose_encoder.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import, division, print_function
2 |
3 | import numpy as np
4 |
5 | import torch
6 | import torch.nn as nn
7 | from .resnet import ResNet, BasicBlock, resnet18, resnet34, resnet50, resnet101, Bottleneck
8 | from torch.nn import BatchNorm2d as bn
9 |
10 |
11 | class ResNetMultiImageInput(ResNet):
12 | def __init__(self, block, layers, num_classes=1000, num_input_images=2):
13 | super(ResNetMultiImageInput, self).__init__(block, layers)
14 | self.inplanes = 64
15 | self.conv1 = nn.Conv2d(num_input_images * 3, 64, kernel_size=7, stride=2, padding=3, bias=False)
16 | self.bn1 = bn(64)
17 | self.relu = nn.ReLU(inplace=True)
18 | self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
19 | self.layer1 = self._make_layer(block, 64, layers[0])
20 | self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
21 | self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
22 | self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
23 |
24 | for m in self.modules():
25 | if isinstance(m, nn.Conv2d):
26 | nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
27 | elif isinstance(m, nn.BatchNorm2d):
28 | nn.init.constant_(m.weight, 1)
29 | nn.init.constant_(m.bias, 0)
30 |
31 |
32 | def resnet_multiimage_input(num_layers, num_input_images=2, pretrained_path=None):
33 | assert num_layers in [18, 34, 50, 101], "Can only run with 18, 34, 50, 101 layers resnet"
34 | blocks = {18 : [2, 2, 2, 2],
35 | 34 : [3, 4, 6, 3],
36 | 50 : [3, 4, 6, 3],
37 | 101: [3, 4, 23, 3],
38 | }[num_layers]
39 |
40 | if num_layers < 40:
41 | model = ResNetMultiImageInput(BasicBlock, blocks, num_input_images=num_input_images)
42 | elif num_layers > 40:
43 | model = ResNetMultiImageInput(Bottleneck, blocks, num_input_images=num_input_images)
44 |
45 | if pretrained_path is not None:
46 | loaded = torch.load(pretrained_path)
47 | loaded['conv1.weight'] = torch.cat([loaded['conv1.weight']] * num_input_images, 1) / num_input_images
48 | model.load_state_dict(loaded)
49 | return model
50 |
51 |
52 | class PoseEncoder(nn.Module):
53 | def __init__(self, num_layers, pretrained_path=None, num_input_images=2):
54 | super(PoseEncoder, self).__init__()
55 |
56 | self.num_ch_enc = np.array([64, 64, 128, 256, 512])
57 |
58 | resnets = {18: resnet18,
59 | 34: resnet34,
60 | 50: resnet50,
61 | 101: resnet101,}
62 |
63 | if num_layers not in resnets:
64 | raise ValueError("{} is not a valid number of resnet layers".format(num_layers))
65 |
66 | if num_input_images > 1:
67 | self.encoder = resnet_multiimage_input(num_layers, num_input_images, pretrained_path)
68 | else:
69 | self.encoder = resnets[num_layers]()
70 | if pretrained_path is not None:
71 | checkpoint = torch.load(pretrained_path)
72 | self.encoder.load_state_dict(checkpoint)
73 |
74 | if num_layers > 34:
75 | self.num_ch_enc[1:] *= 4
76 |
77 | # for name, param in self.encoder.named_parameters():
78 | # if 'bn' in name:
79 | # param.requires_grad = False
80 |
81 | def forward(self, input_image):
82 | self.features = []
83 | x = (input_image - 0.45) / 0.225
84 | x = self.encoder.conv1(x)
85 | x = self.encoder.bn1(x)
86 | self.features.append(self.encoder.relu(x))
87 | self.features.append(self.encoder.layer1(self.encoder.maxpool(self.features[-1])))
88 | self.features.append(self.encoder.layer2(self.features[-1]))
89 | self.features.append(self.encoder.layer3(self.features[-1]))
90 | self.features.append(self.encoder.layer4(self.features[-1]))
91 |
92 | return self.features
93 |
--------------------------------------------------------------------------------
/mono/model/mono_baseline/resnet.py:
--------------------------------------------------------------------------------
1 | import os.path as osp
2 | import torch
3 | import torch.nn as nn
4 | from torch.nn import BatchNorm2d as bn
5 |
6 | def conv3x3(in_planes, out_planes, stride=1):
7 | """3x3 convolution with padding"""
8 | return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, padding=1, bias=False)
9 |
10 |
11 | def conv1x1(in_planes, out_planes, stride=1):
12 | """1x1 convolution"""
13 | return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False)
14 |
15 |
16 | class BasicBlock(nn.Module):
17 | expansion = 1
18 |
19 | def __init__(self, inplanes, planes, stride=1, downsample=None):
20 | super(BasicBlock, self).__init__()
21 | self.conv1 = conv3x3(inplanes, planes, stride)
22 | self.bn1 = bn(planes)
23 | self.relu = nn.ReLU(inplace=True)
24 | self.conv2 = conv3x3(planes, planes)
25 | self.bn2 = bn(planes)
26 | self.downsample = downsample
27 | self.stride = stride
28 |
29 | def forward(self, x):
30 | residual = x
31 |
32 | out = self.conv1(x)
33 | out = self.bn1(out)
34 | out = self.relu(out)
35 |
36 | out = self.conv2(out)
37 | out = self.bn2(out)
38 |
39 | if self.downsample is not None:
40 | residual = self.downsample(x)
41 |
42 | out += residual
43 | out = self.relu(out)
44 |
45 | return out
46 |
47 |
48 | class Bottleneck(nn.Module):
49 | expansion = 4
50 |
51 | def __init__(self, inplanes, planes, stride=1, downsample=None):
52 | super(Bottleneck, self).__init__()
53 | self.conv1 = conv1x1(inplanes, planes)
54 | self.bn1 = bn(planes)
55 | self.conv2 = conv3x3(planes, planes, stride)
56 | self.bn2 = bn(planes)
57 | self.conv3 = conv1x1(planes, planes * self.expansion)
58 | self.bn3 = bn(planes * self.expansion)
59 | self.relu = nn.ReLU(inplace=True)
60 | self.downsample = downsample
61 | self.stride = stride
62 |
63 | def forward(self, x):
64 | residual = x
65 |
66 | out = self.conv1(x)
67 | out = self.bn1(out)
68 | out = self.relu(out)
69 |
70 | out = self.conv2(out)
71 | out = self.bn2(out)
72 | out = self.relu(out)
73 |
74 | out = self.conv3(out)
75 | out = self.bn3(out)
76 |
77 | if self.downsample is not None:
78 | residual = self.downsample(x)
79 |
80 | out += residual
81 | out = self.relu(out)
82 |
83 | return out
84 |
85 |
86 | class ResNet(nn.Module):
87 |
88 | def __init__(self, block, layers, num_classes=1000):
89 | super(ResNet, self).__init__()
90 | self.inplanes = 64
91 | self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False)
92 | self.bn1 = bn(64)
93 | self.relu = nn.ReLU(inplace=True)
94 | self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
95 | self.layer1 = self._make_layer(block, 64, layers[0])
96 | self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
97 | self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
98 | self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
99 | self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
100 | self.fc = nn.Linear(512 * block.expansion, num_classes)
101 |
102 | for m in self.modules():
103 | if isinstance(m, nn.Conv2d):
104 | nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
105 | elif isinstance(m, bn):
106 | nn.init.constant_(m.weight, 1)
107 | nn.init.constant_(m.bias, 0)
108 |
109 | def _make_layer(self, block, planes, blocks, stride=1):
110 | downsample = None
111 | if stride != 1 or self.inplanes != planes * block.expansion:
112 | downsample = nn.Sequential(
113 | conv1x1(self.inplanes, planes * block.expansion, stride),
114 | bn(planes * block.expansion),
115 | )
116 |
117 | layers = []
118 | layers.append(block(self.inplanes, planes, stride, downsample))
119 | self.inplanes = planes * block.expansion
120 | for _ in range(1, blocks):
121 | layers.append(block(self.inplanes, planes))
122 |
123 | return nn.Sequential(*layers)
124 |
125 | def forward(self, x):
126 | x = self.conv1(x)
127 | x = self.bn1(x)
128 | x = self.relu(x)
129 | x = self.maxpool(x)
130 |
131 | x = self.layer1(x)
132 | x = self.layer2(x)
133 | x = self.layer3(x)
134 | x = self.layer4(x)
135 |
136 | return x
137 |
138 |
139 | def resnet18(pretrained_path=None):
140 | """Constructs a ResNet-18 model.
141 | Args:
142 | pretrained (bool): If True, returns a model pre-trained on ImageNet
143 | """
144 | model = ResNet(BasicBlock, [2, 2, 2, 2])
145 | if pretrained_path is not None:
146 | model.load_state_dict(torch.load(pretrained_path))
147 | print('Loaded pre-trained weights')
148 | return model
149 |
150 |
151 | def resnet34(pretrained_path=None, **kwargs):
152 | """Constructs a ResNet-34 model.
153 | Args:
154 | pretrained (bool): If True, returns a model pre-trained on ImageNet
155 | """
156 | model = ResNet(BasicBlock, [3, 4, 6, 3], **kwargs)
157 | if pretrained_path is not None:
158 | model.load_state_dict(torch.load(osp.join(pretrained_path, 'resnet34.pth')))
159 | print('Loaded pre-trained weights')
160 | return model
161 |
162 |
163 | def resnet50(pretrained_path=None, **kwargs):
164 | """Constructs a ResNet-50 model.
165 | Args:
166 | pretrained (bool): If True, returns a model pre-trained on ImageNet
167 | """
168 | model = ResNet(Bottleneck, [3, 4, 6, 3], **kwargs)
169 | if pretrained_path is not None:
170 | model.load_state_dict(torch.load(osp.join(pretrained_path, 'resnet50.pth')))
171 | print('Loaded pre-trained weights')
172 | return model
173 |
174 |
175 | def resnet101(pretrained_path=None, **kwargs):
176 | """Constructs a ResNet-101 model.
177 | Args:
178 | pretrained (bool): If True, returns a model pre-trained on ImageNet
179 | """
180 | model = ResNet(Bottleneck, [3, 4, 23, 3], **kwargs)
181 | if pretrained_path is not None:
182 | model.load_state_dict(torch.load(osp.join(pretrained_path, 'resnet101.pth')))
183 | print('Loaded pre-trained weights')
184 | return model
185 |
--------------------------------------------------------------------------------
/mono/model/mono_fm/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sconlyshootery/FeatDepth/550420b3fb51a027549716b74c6fbce41651d3a5/mono/model/mono_fm/__init__.py
--------------------------------------------------------------------------------
/mono/model/mono_fm/depth_decoder.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | import torch.nn.functional as F
4 | from .layers import Conv1x1, Conv3x3, CRPBlock, upsample
5 |
6 |
7 | class DepthDecoder(nn.Module):
8 | def __init__(self, num_ch_enc):
9 | super(DepthDecoder, self).__init__()
10 |
11 | bottleneck = 256
12 | stage = 4
13 | self.do = nn.Dropout(p=0.5)
14 |
15 | self.reduce4 = Conv1x1(num_ch_enc[4], 512, bias=False)
16 | self.reduce3 = Conv1x1(num_ch_enc[3], bottleneck, bias=False)
17 | self.reduce2 = Conv1x1(num_ch_enc[2], bottleneck, bias=False)
18 | self.reduce1 = Conv1x1(num_ch_enc[1], bottleneck, bias=False)
19 |
20 | self.iconv4 = Conv3x3(512, bottleneck)
21 | self.iconv3 = Conv3x3(bottleneck*2+1, bottleneck)
22 | self.iconv2 = Conv3x3(bottleneck*2+1, bottleneck)
23 | self.iconv1 = Conv3x3(bottleneck*2+1, bottleneck)
24 |
25 | self.crp4 = self._make_crp(bottleneck, bottleneck, stage)
26 | self.crp3 = self._make_crp(bottleneck, bottleneck, stage)
27 | self.crp2 = self._make_crp(bottleneck, bottleneck, stage)
28 | self.crp1 = self._make_crp(bottleneck, bottleneck, stage)
29 |
30 | self.merge4 = Conv3x3(bottleneck, bottleneck)
31 | self.merge3 = Conv3x3(bottleneck, bottleneck)
32 | self.merge2 = Conv3x3(bottleneck, bottleneck)
33 | self.merge1 = Conv3x3(bottleneck, bottleneck)
34 |
35 | # disp
36 | self.disp4 = nn.Sequential(Conv3x3(bottleneck, 1), nn.Sigmoid())
37 | self.disp3 = nn.Sequential(Conv3x3(bottleneck, 1), nn.Sigmoid())
38 | self.disp2 = nn.Sequential(Conv3x3(bottleneck, 1), nn.Sigmoid())
39 | self.disp1 = nn.Sequential(Conv3x3(bottleneck, 1), nn.Sigmoid())
40 |
41 | def _make_crp(self, in_planes, out_planes, stages):
42 | layers = [CRPBlock(in_planes, out_planes,stages)]
43 | return nn.Sequential(*layers)
44 |
45 | def forward(self, input_features, frame_id=0):
46 | self.outputs = {}
47 | l0, l1, l2, l3, l4 = input_features
48 |
49 | l4 = self.do(l4)
50 | l3 = self.do(l3)
51 |
52 | x4 = self.reduce4(l4)
53 | x4 = self.iconv4(x4)
54 | x4 = F.leaky_relu(x4)
55 | x4 = self.crp4(x4)
56 | x4 = self.merge4(x4)
57 | x4 = F.leaky_relu(x4)
58 | x4 = upsample(x4)
59 | disp4 = self.disp4(x4)
60 |
61 |
62 | x3 = self.reduce3(l3)
63 | x3 = torch.cat((x3, x4, disp4), 1)
64 | x3 = self.iconv3(x3)
65 | x3 = F.leaky_relu(x3)
66 | x3 = self.crp3(x3)
67 | x3 = self.merge3(x3)
68 | x3 = F.leaky_relu(x3)
69 | x3 = upsample(x3)
70 | disp3 = self.disp3(x3)
71 |
72 |
73 | x2 = self.reduce2(l2)
74 | x2 = torch.cat((x2, x3 , disp3), 1)
75 | x2 = self.iconv2(x2)
76 | x2 = F.leaky_relu(x2)
77 | x2 = self.crp2(x2)
78 | x2 = self.merge2(x2)
79 | x2 = F.leaky_relu(x2)
80 | x2 = upsample(x2)
81 | disp2 = self.disp2(x2)
82 |
83 | x1 = self.reduce1(l1)
84 | x1 = torch.cat((x1, x2, disp2), 1)
85 | x1 = self.iconv1(x1)
86 | x1 = F.leaky_relu(x1)
87 | x1 = self.crp1(x1)
88 | x1 = self.merge1(x1)
89 | x1 = F.leaky_relu(x1)
90 | x1 = upsample(x1)
91 | disp1 = self.disp1(x1)
92 |
93 | self.outputs[("disp", frame_id, 3)] = disp4
94 | self.outputs[("disp", frame_id, 2)] = disp3
95 | self.outputs[("disp", frame_id, 1)] = disp2
96 | self.outputs[("disp", frame_id, 0)] = disp1
97 |
98 | return self.outputs
99 |
--------------------------------------------------------------------------------
/mono/model/mono_fm/depth_encoder.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import, division, print_function
2 | import numpy as np
3 | import torch
4 | import torch.nn as nn
5 | from .resnet import resnet18, resnet34, resnet50, resnet101
6 |
7 |
8 | class DepthEncoder(nn.Module):
9 | def __init__(self, num_layers, pretrained_path=None):
10 | super(DepthEncoder, self).__init__()
11 |
12 | self.num_ch_enc = np.array([64, 64, 128, 256, 512])
13 |
14 | resnets = {18: resnet18,
15 | 34: resnet34,
16 | 50: resnet50,
17 | 101: resnet101,}
18 |
19 | if num_layers not in resnets:
20 | raise ValueError("{} is not a valid number of resnet layers".format(num_layers))
21 |
22 |
23 | self.encoder = resnets[num_layers]()
24 | if pretrained_path is not None:
25 | checkpoint = torch.load(pretrained_path)
26 | self.encoder.load_state_dict(checkpoint)
27 |
28 | if num_layers > 34:
29 | self.num_ch_enc[1:] *= 4
30 |
31 | # for name, param in self.encoder.named_parameters():
32 | # if 'bn' in name:
33 | # param.requires_grad = False
34 |
35 | def forward(self, input_image):
36 | self.features = []
37 | x = (input_image - 0.45) / 0.225
38 | self.features.append(self.encoder.relu(self.encoder.bn1(self.encoder.conv1(x))))
39 | self.features.append(self.encoder.layer1(self.encoder.maxpool(self.features[-1])))
40 | self.features.append(self.encoder.layer2(self.features[-1]))
41 | self.features.append(self.encoder.layer3(self.features[-1]))
42 | self.features.append(self.encoder.layer4(self.features[-1]))
43 |
44 | return self.features
45 |
--------------------------------------------------------------------------------
/mono/model/mono_fm/pose_decoder.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import, division, print_function
2 | import torch.nn as nn
3 |
4 |
5 | class PoseDecoder(nn.Module):
6 | def __init__(self, num_ch_enc, stride=1):
7 | super(PoseDecoder, self).__init__()
8 |
9 | self.reduce = nn.Conv2d(num_ch_enc[-1], 256, 1)
10 | self.conv1 = nn.Conv2d(256, 256, 3, stride, 1)
11 | self.conv2 = nn.Conv2d(256, 256, 3, stride, 1)
12 | self.conv3 = nn.Conv2d(256, 6, 1)
13 |
14 | self.relu = nn.ReLU()
15 |
16 | def forward(self, input_features):
17 | f = input_features[-1]
18 | out = self.relu(self.reduce(f))
19 | out = self.relu(self.conv1(out))
20 | out = self.relu(self.conv2(out))
21 | out = self.conv3(out)
22 | out = out.mean(3).mean(2)
23 | out = 0.01 * out.view(-1, 1, 1, 6)
24 | axisangle = out[..., :3]
25 | translation = out[..., 3:]
26 | return axisangle, translation
27 |
--------------------------------------------------------------------------------
/mono/model/mono_fm/pose_encoder.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import, division, print_function
2 |
3 | import numpy as np
4 |
5 | import torch
6 | import torch.nn as nn
7 | from .resnet import ResNet, BasicBlock, resnet18, resnet34, resnet50, resnet101, Bottleneck
8 | from torch.nn import BatchNorm2d as bn
9 |
10 |
11 | class ResNetMultiImageInput(ResNet):
12 | def __init__(self, block, layers, num_classes=1000, num_input_images=2):
13 | super(ResNetMultiImageInput, self).__init__(block, layers)
14 | self.inplanes = 64
15 | self.conv1 = nn.Conv2d(num_input_images * 3, 64, kernel_size=7, stride=2, padding=3, bias=False)
16 | self.bn1 = bn(64)
17 | self.relu = nn.ReLU(inplace=True)
18 | self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
19 | self.layer1 = self._make_layer(block, 64, layers[0])
20 | self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
21 | self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
22 | self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
23 |
24 | for m in self.modules():
25 | if isinstance(m, nn.Conv2d):
26 | nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
27 | elif isinstance(m, nn.BatchNorm2d):
28 | nn.init.constant_(m.weight, 1)
29 | nn.init.constant_(m.bias, 0)
30 |
31 |
32 | def resnet_multiimage_input(num_layers, num_input_images=2, pretrained_path=None):
33 | assert num_layers in [18, 34, 50, 101], "Can only run with 18, 34, 50, 101 layers resnet"
34 | blocks = {18 : [2, 2, 2, 2],
35 | 34 : [3, 4, 6, 3],
36 | 50 : [3, 4, 6, 3],
37 | 101: [3, 4, 23, 3],
38 | }[num_layers]
39 |
40 | if num_layers < 40:
41 | model = ResNetMultiImageInput(BasicBlock, blocks, num_input_images=num_input_images)
42 | elif num_layers > 40:
43 | model = ResNetMultiImageInput(Bottleneck, blocks, num_input_images=num_input_images)
44 |
45 | if pretrained_path is not None:
46 | loaded = torch.load(pretrained_path)
47 | loaded['conv1.weight'] = torch.cat([loaded['conv1.weight']] * num_input_images, 1) / num_input_images
48 | model.load_state_dict(loaded)
49 | return model
50 |
51 |
52 | class PoseEncoder(nn.Module):
53 | def __init__(self, num_layers, pretrained_path=None, num_input_images=2):
54 | super(PoseEncoder, self).__init__()
55 |
56 | self.num_ch_enc = np.array([64, 64, 128, 256, 512])
57 |
58 | resnets = {18: resnet18,
59 | 34: resnet34,
60 | 50: resnet50,
61 | 101: resnet101,}
62 |
63 | if num_layers not in resnets:
64 | raise ValueError("{} is not a valid number of resnet layers".format(num_layers))
65 |
66 | if num_input_images > 1:
67 | self.encoder = resnet_multiimage_input(num_layers, num_input_images, pretrained_path)
68 | else:
69 | self.encoder = resnets[num_layers]()
70 | if pretrained_path is not None:
71 | checkpoint = torch.load(pretrained_path)
72 | self.encoder.load_state_dict(checkpoint)
73 |
74 | if num_layers > 34:
75 | self.num_ch_enc[1:] *= 4
76 |
77 | # for name, param in self.encoder.named_parameters():
78 | # if 'bn' in name:
79 | # param.requires_grad = False
80 |
81 | def forward(self, input_image):
82 | self.features = []
83 | x = (input_image - 0.45) / 0.225
84 | x = self.encoder.conv1(x)
85 | x = self.encoder.bn1(x)
86 | self.features.append(self.encoder.relu(x))
87 | self.features.append(self.encoder.layer1(self.encoder.maxpool(self.features[-1])))
88 | self.features.append(self.encoder.layer2(self.features[-1]))
89 | self.features.append(self.encoder.layer3(self.features[-1]))
90 | self.features.append(self.encoder.layer4(self.features[-1]))
91 |
92 | return self.features
93 |
--------------------------------------------------------------------------------
/mono/model/mono_fm/resnet.py:
--------------------------------------------------------------------------------
1 | import os.path as osp
2 | import torch
3 | import torch.nn as nn
4 | from torch.nn import BatchNorm2d as bn
5 |
6 | def conv3x3(in_planes, out_planes, stride=1):
7 | """3x3 convolution with padding"""
8 | return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, padding=1, bias=False)
9 |
10 |
11 | def conv1x1(in_planes, out_planes, stride=1):
12 | """1x1 convolution"""
13 | return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False)
14 |
15 |
16 | class BasicBlock(nn.Module):
17 | expansion = 1
18 |
19 | def __init__(self, inplanes, planes, stride=1, downsample=None):
20 | super(BasicBlock, self).__init__()
21 | self.conv1 = conv3x3(inplanes, planes, stride)
22 | self.bn1 = bn(planes)
23 | self.relu = nn.ReLU(inplace=True)
24 | self.conv2 = conv3x3(planes, planes)
25 | self.bn2 = bn(planes)
26 | self.downsample = downsample
27 | self.stride = stride
28 |
29 | def forward(self, x):
30 | residual = x
31 |
32 | out = self.conv1(x)
33 | out = self.bn1(out)
34 | out = self.relu(out)
35 |
36 | out = self.conv2(out)
37 | out = self.bn2(out)
38 |
39 | if self.downsample is not None:
40 | residual = self.downsample(x)
41 |
42 | out += residual
43 | out = self.relu(out)
44 |
45 | return out
46 |
47 |
48 | class Bottleneck(nn.Module):
49 | expansion = 4
50 |
51 | def __init__(self, inplanes, planes, stride=1, downsample=None):
52 | super(Bottleneck, self).__init__()
53 | self.conv1 = conv1x1(inplanes, planes)
54 | self.bn1 = bn(planes)
55 | self.conv2 = conv3x3(planes, planes, stride)
56 | self.bn2 = bn(planes)
57 | self.conv3 = conv1x1(planes, planes * self.expansion)
58 | self.bn3 = bn(planes * self.expansion)
59 | self.relu = nn.ReLU(inplace=True)
60 | self.downsample = downsample
61 | self.stride = stride
62 |
63 | def forward(self, x):
64 | residual = x
65 |
66 | out = self.conv1(x)
67 | out = self.bn1(out)
68 | out = self.relu(out)
69 |
70 | out = self.conv2(out)
71 | out = self.bn2(out)
72 | out = self.relu(out)
73 |
74 | out = self.conv3(out)
75 | out = self.bn3(out)
76 |
77 | if self.downsample is not None:
78 | residual = self.downsample(x)
79 |
80 | out += residual
81 | out = self.relu(out)
82 |
83 | return out
84 |
85 |
86 | class ResNet(nn.Module):
87 |
88 | def __init__(self, block, layers, num_classes=1000):
89 | super(ResNet, self).__init__()
90 | self.inplanes = 64
91 | self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False)
92 | self.bn1 = bn(64)
93 | self.relu = nn.ReLU(inplace=True)
94 | self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
95 | self.layer1 = self._make_layer(block, 64, layers[0])
96 | self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
97 | self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
98 | self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
99 | self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
100 | self.fc = nn.Linear(512 * block.expansion, num_classes)
101 |
102 | for m in self.modules():
103 | if isinstance(m, nn.Conv2d):
104 | nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
105 | elif isinstance(m, bn):
106 | nn.init.constant_(m.weight, 1)
107 | nn.init.constant_(m.bias, 0)
108 |
109 | def _make_layer(self, block, planes, blocks, stride=1):
110 | downsample = None
111 | if stride != 1 or self.inplanes != planes * block.expansion:
112 | downsample = nn.Sequential(
113 | conv1x1(self.inplanes, planes * block.expansion, stride),
114 | bn(planes * block.expansion),
115 | )
116 |
117 | layers = []
118 | layers.append(block(self.inplanes, planes, stride, downsample))
119 | self.inplanes = planes * block.expansion
120 | for _ in range(1, blocks):
121 | layers.append(block(self.inplanes, planes))
122 |
123 | return nn.Sequential(*layers)
124 |
125 | def forward(self, x):
126 | x = self.conv1(x)
127 | x = self.bn1(x)
128 | x = self.relu(x)
129 | x = self.maxpool(x)
130 |
131 | x = self.layer1(x)
132 | x = self.layer2(x)
133 | x = self.layer3(x)
134 | x = self.layer4(x)
135 |
136 | return x
137 |
138 |
139 | def resnet18(pretrained_path=None):
140 | """Constructs a ResNet-18 model.
141 | Args:
142 | pretrained (bool): If True, returns a model pre-trained on ImageNet
143 | """
144 | model = ResNet(BasicBlock, [2, 2, 2, 2])
145 | if pretrained_path is not None:
146 | model.load_state_dict(torch.load(pretrained_path))
147 | print('Loaded pre-trained weights')
148 | return model
149 |
150 |
151 | def resnet34(pretrained_path=None, **kwargs):
152 | """Constructs a ResNet-34 model.
153 | Args:
154 | pretrained (bool): If True, returns a model pre-trained on ImageNet
155 | """
156 | model = ResNet(BasicBlock, [3, 4, 6, 3], **kwargs)
157 | if pretrained_path is not None:
158 | model.load_state_dict(torch.load(osp.join(pretrained_path, 'resnet34.pth')))
159 | print('Loaded pre-trained weights')
160 | return model
161 |
162 |
163 | def resnet50(pretrained_path=None, **kwargs):
164 | """Constructs a ResNet-50 model.
165 | Args:
166 | pretrained (bool): If True, returns a model pre-trained on ImageNet
167 | """
168 | model = ResNet(Bottleneck, [3, 4, 6, 3], **kwargs)
169 | if pretrained_path is not None:
170 | model.load_state_dict(torch.load(osp.join(pretrained_path, 'resnet50.pth')))
171 | print('Loaded pre-trained weights')
172 | return model
173 |
174 |
175 | def resnet101(pretrained_path=None, **kwargs):
176 | """Constructs a ResNet-101 model.
177 | Args:
178 | pretrained (bool): If True, returns a model pre-trained on ImageNet
179 | """
180 | model = ResNet(Bottleneck, [3, 4, 23, 3], **kwargs)
181 | if pretrained_path is not None:
182 | model.load_state_dict(torch.load(osp.join(pretrained_path, 'resnet101.pth')))
183 | print('Loaded pre-trained weights')
184 | return model
185 |
--------------------------------------------------------------------------------
/mono/model/mono_fm_joint/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sconlyshootery/FeatDepth/550420b3fb51a027549716b74c6fbce41651d3a5/mono/model/mono_fm_joint/__init__.py
--------------------------------------------------------------------------------
/mono/model/mono_fm_joint/decoder.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import, division, print_function
2 | import torch.nn as nn
3 | from .layers import ConvBlock, Conv3x3, upsample
4 |
5 |
6 | class Decoder(nn.Module):
7 | def __init__(self, num_ch_enc, num_output_channels=3):
8 | super(Decoder, self).__init__()
9 |
10 | num_ch_dec = [16, 32, 64, 128, 256]
11 |
12 | # upconv
13 | self.upconv5 = ConvBlock(num_ch_enc[4], num_ch_dec[4])
14 | self.upconv4 = ConvBlock(num_ch_dec[4], num_ch_dec[3])
15 | self.upconv3 = ConvBlock(num_ch_dec[3], num_ch_dec[2])
16 | self.upconv2 = ConvBlock(num_ch_dec[2], num_ch_dec[1])
17 | self.upconv1 = ConvBlock(num_ch_dec[1], num_ch_dec[0])
18 |
19 | # iconv
20 | self.iconv5 = ConvBlock(num_ch_dec[4], num_ch_dec[4])
21 | self.iconv4 = ConvBlock(num_ch_dec[3], num_ch_dec[3])
22 | self.iconv3 = ConvBlock(num_ch_dec[2], num_ch_dec[2])
23 | self.iconv2 = ConvBlock(num_ch_dec[1], num_ch_dec[1])
24 | self.iconv1 = ConvBlock(num_ch_dec[0], num_ch_dec[0])
25 |
26 | # disp
27 | self.disp4 = Conv3x3(num_ch_dec[3], num_output_channels)
28 | self.disp3 = Conv3x3(num_ch_dec[2], num_output_channels)
29 | self.disp2 = Conv3x3(num_ch_dec[1], num_output_channels)
30 | self.disp1 = Conv3x3(num_ch_dec[0], num_output_channels)
31 |
32 | self.sigmoid = nn.Sigmoid()
33 |
34 |
35 | def forward(self, input_features, frame_id=0):
36 | self.outputs = {}
37 | _, _, _, _, econv5 = input_features
38 | # (64,64,128,256,512)*4
39 |
40 | upconv5 = upsample(self.upconv5(econv5))
41 | iconv5 = self.iconv5(upconv5)
42 |
43 | upconv4 = upsample(self.upconv4(iconv5))
44 | iconv4 = self.iconv4(upconv4)
45 |
46 | upconv3 = upsample(self.upconv3(iconv4))
47 | iconv3 = self.iconv3(upconv3)
48 |
49 | upconv2 = upsample(self.upconv2(iconv3))
50 | iconv2 = self.iconv2(upconv2)
51 |
52 | upconv1 = upsample(self.upconv1(iconv2))
53 | iconv1 = self.iconv1(upconv1)
54 |
55 | self.outputs[("res_img", frame_id, 3)] = self.sigmoid(self.disp4(iconv4))
56 | self.outputs[("res_img", frame_id, 2)] = self.sigmoid(self.disp3(iconv3))
57 | self.outputs[("res_img", frame_id, 1)] = self.sigmoid(self.disp2(iconv2))
58 | self.outputs[("res_img", frame_id, 0)] = self.sigmoid(self.disp1(iconv1))
59 | return self.outputs
--------------------------------------------------------------------------------
/mono/model/mono_fm_joint/depth_decoder.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | import torch.nn.functional as F
4 | from .layers import Conv1x1, Conv3x3, CRPBlock, upsample
5 |
6 |
7 | class DepthDecoder(nn.Module):
8 | def __init__(self, num_ch_enc):
9 | super(DepthDecoder, self).__init__()
10 |
11 | bottleneck = 256
12 | stage = 4
13 | self.do = nn.Dropout(p=0.5)
14 |
15 | self.reduce4 = Conv1x1(num_ch_enc[4], 512, bias=False)
16 | self.reduce3 = Conv1x1(num_ch_enc[3], bottleneck, bias=False)
17 | self.reduce2 = Conv1x1(num_ch_enc[2], bottleneck, bias=False)
18 | self.reduce1 = Conv1x1(num_ch_enc[1], bottleneck, bias=False)
19 |
20 | self.iconv4 = Conv3x3(512, bottleneck)
21 | self.iconv3 = Conv3x3(bottleneck*2+1, bottleneck)
22 | self.iconv2 = Conv3x3(bottleneck*2+1, bottleneck)
23 | self.iconv1 = Conv3x3(bottleneck*2+1, bottleneck)
24 |
25 | self.crp4 = self._make_crp(bottleneck, bottleneck, stage)
26 | self.crp3 = self._make_crp(bottleneck, bottleneck, stage)
27 | self.crp2 = self._make_crp(bottleneck, bottleneck, stage)
28 | self.crp1 = self._make_crp(bottleneck, bottleneck, stage)
29 |
30 | self.merge4 = Conv3x3(bottleneck, bottleneck)
31 | self.merge3 = Conv3x3(bottleneck, bottleneck)
32 | self.merge2 = Conv3x3(bottleneck, bottleneck)
33 | self.merge1 = Conv3x3(bottleneck, bottleneck)
34 |
35 | # disp
36 | self.disp4 = nn.Sequential(Conv3x3(bottleneck, 1), nn.Sigmoid())
37 | self.disp3 = nn.Sequential(Conv3x3(bottleneck, 1), nn.Sigmoid())
38 | self.disp2 = nn.Sequential(Conv3x3(bottleneck, 1), nn.Sigmoid())
39 | self.disp1 = nn.Sequential(Conv3x3(bottleneck, 1), nn.Sigmoid())
40 |
41 | def _make_crp(self, in_planes, out_planes, stages):
42 | layers = [CRPBlock(in_planes, out_planes,stages)]
43 | return nn.Sequential(*layers)
44 |
45 | def forward(self, input_features, frame_id=0):
46 | self.outputs = {}
47 | l0, l1, l2, l3, l4 = input_features
48 |
49 | l4 = self.do(l4)
50 | l3 = self.do(l3)
51 |
52 | x4 = self.reduce4(l4)
53 | x4 = self.iconv4(x4)
54 | x4 = F.leaky_relu(x4)
55 | x4 = self.crp4(x4)
56 | x4 = self.merge4(x4)
57 | x4 = F.leaky_relu(x4)
58 | x4 = upsample(x4)
59 | disp4 = self.disp4(x4)
60 |
61 |
62 | x3 = self.reduce3(l3)
63 | x3 = torch.cat((x3, x4, disp4), 1)
64 | x3 = self.iconv3(x3)
65 | x3 = F.leaky_relu(x3)
66 | x3 = self.crp3(x3)
67 | x3 = self.merge3(x3)
68 | x3 = F.leaky_relu(x3)
69 | x3 = upsample(x3)
70 | disp3 = self.disp3(x3)
71 |
72 |
73 | x2 = self.reduce2(l2)
74 | x2 = torch.cat((x2, x3 , disp3), 1)
75 | x2 = self.iconv2(x2)
76 | x2 = F.leaky_relu(x2)
77 | x2 = self.crp2(x2)
78 | x2 = self.merge2(x2)
79 | x2 = F.leaky_relu(x2)
80 | x2 = upsample(x2)
81 | disp2 = self.disp2(x2)
82 |
83 | x1 = self.reduce1(l1)
84 | x1 = torch.cat((x1, x2, disp2), 1)
85 | x1 = self.iconv1(x1)
86 | x1 = F.leaky_relu(x1)
87 | x1 = self.crp1(x1)
88 | x1 = self.merge1(x1)
89 | x1 = F.leaky_relu(x1)
90 | x1 = upsample(x1)
91 | disp1 = self.disp1(x1)
92 |
93 | self.outputs[("disp", frame_id, 3)] = disp4
94 | self.outputs[("disp", frame_id, 2)] = disp3
95 | self.outputs[("disp", frame_id, 1)] = disp2
96 | self.outputs[("disp", frame_id, 0)] = disp1
97 |
98 | return self.outputs
99 |
--------------------------------------------------------------------------------
/mono/model/mono_fm_joint/depth_encoder.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import, division, print_function
2 | import numpy as np
3 | import torch
4 | import torch.nn as nn
5 | from .resnet import resnet18, resnet34, resnet50, resnet101
6 |
7 |
8 | class DepthEncoder(nn.Module):
9 | def __init__(self, num_layers, pretrained_path=None):
10 | super(DepthEncoder, self).__init__()
11 |
12 | self.num_ch_enc = np.array([64, 64, 128, 256, 512])
13 |
14 | resnets = {18: resnet18,
15 | 34: resnet34,
16 | 50: resnet50,
17 | 101: resnet101,}
18 |
19 | if num_layers not in resnets:
20 | raise ValueError("{} is not a valid number of resnet layers".format(num_layers))
21 |
22 |
23 | self.encoder = resnets[num_layers]()
24 | if pretrained_path is not None:
25 | checkpoint = torch.load(pretrained_path)
26 | self.encoder.load_state_dict(checkpoint)
27 |
28 | if num_layers > 34:
29 | self.num_ch_enc[1:] *= 4
30 |
31 | # for name, param in self.encoder.named_parameters():
32 | # if 'bn' in name:
33 | # param.requires_grad = False
34 |
35 | def forward(self, input_image):
36 | self.features = []
37 | x = (input_image - 0.45) / 0.225
38 | self.features.append(self.encoder.relu(self.encoder.bn1(self.encoder.conv1(x))))
39 | self.features.append(self.encoder.layer1(self.encoder.maxpool(self.features[-1])))
40 | self.features.append(self.encoder.layer2(self.features[-1]))
41 | self.features.append(self.encoder.layer3(self.features[-1]))
42 | self.features.append(self.encoder.layer4(self.features[-1]))
43 |
44 | return self.features
45 |
--------------------------------------------------------------------------------
/mono/model/mono_fm_joint/encoder.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import, division, print_function
2 | import numpy as np
3 | import torch
4 | import torch.nn as nn
5 | from .resnet import resnet18, resnet34, resnet50, resnet101
6 |
7 |
8 | class Encoder(nn.Module):
9 | def __init__(self, num_layers, pretrained_path=None):
10 | super(Encoder, self).__init__()
11 |
12 | self.num_ch_enc = np.array([64, 64, 128, 256, 512])
13 |
14 | resnets = {18: resnet18,
15 | 34: resnet34,
16 | 50: resnet50,
17 | 101: resnet101,}
18 |
19 | if num_layers not in resnets:
20 | raise ValueError("{} is not a valid number of resnet layers".format(num_layers))
21 |
22 |
23 | self.encoder = resnets[num_layers]()
24 | if pretrained_path is not None:
25 | checkpoint = torch.load(pretrained_path)
26 | self.encoder.load_state_dict(checkpoint)
27 |
28 | if num_layers > 34:
29 | self.num_ch_enc[1:] *= 4
30 |
31 | # for name, param in self.encoder.named_parameters():
32 | # if 'bn' in name:
33 | # param.requires_grad = False
34 |
35 | def forward(self, input_image):
36 | self.features = []
37 | self.features.append(self.encoder.relu(self.encoder.bn1(self.encoder.conv1(input_image))))
38 | self.features.append(self.encoder.layer1(self.encoder.maxpool(self.features[-1])))
39 | self.features.append(self.encoder.layer2(self.features[-1]))
40 | self.features.append(self.encoder.layer3(self.features[-1]))
41 | self.features.append(self.encoder.layer4(self.features[-1]))
42 |
43 | return self.features
44 |
--------------------------------------------------------------------------------
/mono/model/mono_fm_joint/pose_decoder.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import, division, print_function
2 | import torch.nn as nn
3 |
4 |
5 | class PoseDecoder(nn.Module):
6 | def __init__(self, num_ch_enc, stride=1):
7 | super(PoseDecoder, self).__init__()
8 |
9 | self.reduce = nn.Conv2d(num_ch_enc[-1], 256, 1)
10 | self.conv1 = nn.Conv2d(256, 256, 3, stride, 1)
11 | self.conv2 = nn.Conv2d(256, 256, 3, stride, 1)
12 | self.conv3 = nn.Conv2d(256, 6, 1)
13 |
14 | self.relu = nn.ReLU()
15 |
16 | def forward(self, input_features):
17 | f = input_features[-1]
18 | out = self.relu(self.reduce(f))
19 | out = self.relu(self.conv1(out))
20 | out = self.relu(self.conv2(out))
21 | out = self.conv3(out)
22 | out = out.mean(3).mean(2)
23 | out = 0.01 * out.view(-1, 1, 1, 6)
24 | axisangle = out[..., :3]
25 | translation = out[..., 3:]
26 | return axisangle, translation
27 |
--------------------------------------------------------------------------------
/mono/model/mono_fm_joint/pose_encoder.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import, division, print_function
2 |
3 | import numpy as np
4 |
5 | import torch
6 | import torch.nn as nn
7 | from .resnet import ResNet, BasicBlock, resnet18, resnet34, resnet50, resnet101, Bottleneck
8 | from torch.nn import BatchNorm2d as bn
9 |
10 |
11 | class ResNetMultiImageInput(ResNet):
12 | def __init__(self, block, layers, num_classes=1000, num_input_images=2):
13 | super(ResNetMultiImageInput, self).__init__(block, layers)
14 | self.inplanes = 64
15 | self.conv1 = nn.Conv2d(num_input_images * 3, 64, kernel_size=7, stride=2, padding=3, bias=False)
16 | self.bn1 = bn(64)
17 | self.relu = nn.ReLU(inplace=True)
18 | self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
19 | self.layer1 = self._make_layer(block, 64, layers[0])
20 | self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
21 | self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
22 | self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
23 |
24 | for m in self.modules():
25 | if isinstance(m, nn.Conv2d):
26 | nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
27 | elif isinstance(m, nn.BatchNorm2d):
28 | nn.init.constant_(m.weight, 1)
29 | nn.init.constant_(m.bias, 0)
30 |
31 |
32 | def resnet_multiimage_input(num_layers, num_input_images=2, pretrained_path=None):
33 | assert num_layers in [18, 34, 50, 101], "Can only run with 18, 34, 50, 101 layers resnet"
34 | blocks = {18 : [2, 2, 2, 2],
35 | 34 : [3, 4, 6, 3],
36 | 50 : [3, 4, 6, 3],
37 | 101: [3, 4, 23, 3],
38 | }[num_layers]
39 |
40 | if num_layers < 40:
41 | model = ResNetMultiImageInput(BasicBlock, blocks, num_input_images=num_input_images)
42 | elif num_layers > 40:
43 | model = ResNetMultiImageInput(Bottleneck, blocks, num_input_images=num_input_images)
44 |
45 | if pretrained_path is not None:
46 | loaded = torch.load(pretrained_path)
47 | loaded['conv1.weight'] = torch.cat([loaded['conv1.weight']] * num_input_images, 1) / num_input_images
48 | model.load_state_dict(loaded)
49 | return model
50 |
51 |
52 | class PoseEncoder(nn.Module):
53 | def __init__(self, num_layers, pretrained_path=None, num_input_images=2):
54 | super(PoseEncoder, self).__init__()
55 |
56 | self.num_ch_enc = np.array([64, 64, 128, 256, 512])
57 |
58 | resnets = {18: resnet18,
59 | 34: resnet34,
60 | 50: resnet50,
61 | 101: resnet101,}
62 |
63 | if num_layers not in resnets:
64 | raise ValueError("{} is not a valid number of resnet layers".format(num_layers))
65 |
66 | if num_input_images > 1:
67 | self.encoder = resnet_multiimage_input(num_layers, num_input_images, pretrained_path)
68 | else:
69 | self.encoder = resnets[num_layers]()
70 | if pretrained_path is not None:
71 | checkpoint = torch.load(pretrained_path)
72 | self.encoder.load_state_dict(checkpoint)
73 |
74 | if num_layers > 34:
75 | self.num_ch_enc[1:] *= 4
76 |
77 | # for name, param in self.encoder.named_parameters():
78 | # if 'bn' in name:
79 | # param.requires_grad = False
80 |
81 | def forward(self, input_image):
82 | self.features = []
83 | x = (input_image - 0.45) / 0.225
84 | x = self.encoder.conv1(x)
85 | x = self.encoder.bn1(x)
86 | self.features.append(self.encoder.relu(x))
87 | self.features.append(self.encoder.layer1(self.encoder.maxpool(self.features[-1])))
88 | self.features.append(self.encoder.layer2(self.features[-1]))
89 | self.features.append(self.encoder.layer3(self.features[-1]))
90 | self.features.append(self.encoder.layer4(self.features[-1]))
91 |
92 | return self.features
93 |
--------------------------------------------------------------------------------
/mono/model/mono_fm_joint/resnet.py:
--------------------------------------------------------------------------------
1 | import os.path as osp
2 | import torch
3 | import torch.nn as nn
4 | from torch.nn import BatchNorm2d as bn
5 |
6 | def conv3x3(in_planes, out_planes, stride=1):
7 | """3x3 convolution with padding"""
8 | return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, padding=1, bias=False)
9 |
10 |
11 | def conv1x1(in_planes, out_planes, stride=1):
12 | """1x1 convolution"""
13 | return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False)
14 |
15 |
16 | class BasicBlock(nn.Module):
17 | expansion = 1
18 |
19 | def __init__(self, inplanes, planes, stride=1, downsample=None):
20 | super(BasicBlock, self).__init__()
21 | self.conv1 = conv3x3(inplanes, planes, stride)
22 | self.bn1 = bn(planes)
23 | self.relu = nn.ReLU(inplace=True)
24 | self.conv2 = conv3x3(planes, planes)
25 | self.bn2 = bn(planes)
26 | self.downsample = downsample
27 | self.stride = stride
28 |
29 | def forward(self, x):
30 | residual = x
31 |
32 | out = self.conv1(x)
33 | out = self.bn1(out)
34 | out = self.relu(out)
35 |
36 | out = self.conv2(out)
37 | out = self.bn2(out)
38 |
39 | if self.downsample is not None:
40 | residual = self.downsample(x)
41 |
42 | out += residual
43 | out = self.relu(out)
44 |
45 | return out
46 |
47 |
48 | class Bottleneck(nn.Module):
49 | expansion = 4
50 |
51 | def __init__(self, inplanes, planes, stride=1, downsample=None):
52 | super(Bottleneck, self).__init__()
53 | self.conv1 = conv1x1(inplanes, planes)
54 | self.bn1 = bn(planes)
55 | self.conv2 = conv3x3(planes, planes, stride)
56 | self.bn2 = bn(planes)
57 | self.conv3 = conv1x1(planes, planes * self.expansion)
58 | self.bn3 = bn(planes * self.expansion)
59 | self.relu = nn.ReLU(inplace=True)
60 | self.downsample = downsample
61 | self.stride = stride
62 |
63 | def forward(self, x):
64 | residual = x
65 |
66 | out = self.conv1(x)
67 | out = self.bn1(out)
68 | out = self.relu(out)
69 |
70 | out = self.conv2(out)
71 | out = self.bn2(out)
72 | out = self.relu(out)
73 |
74 | out = self.conv3(out)
75 | out = self.bn3(out)
76 |
77 | if self.downsample is not None:
78 | residual = self.downsample(x)
79 |
80 | out += residual
81 | out = self.relu(out)
82 |
83 | return out
84 |
85 |
86 | class ResNet(nn.Module):
87 |
88 | def __init__(self, block, layers, num_classes=1000):
89 | super(ResNet, self).__init__()
90 | self.inplanes = 64
91 | self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False)
92 | self.bn1 = bn(64)
93 | self.relu = nn.ReLU(inplace=True)
94 | self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
95 | self.layer1 = self._make_layer(block, 64, layers[0])
96 | self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
97 | self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
98 | self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
99 | self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
100 | self.fc = nn.Linear(512 * block.expansion, num_classes)
101 |
102 | for m in self.modules():
103 | if isinstance(m, nn.Conv2d):
104 | nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
105 | elif isinstance(m, bn):
106 | nn.init.constant_(m.weight, 1)
107 | nn.init.constant_(m.bias, 0)
108 |
109 | def _make_layer(self, block, planes, blocks, stride=1):
110 | downsample = None
111 | if stride != 1 or self.inplanes != planes * block.expansion:
112 | downsample = nn.Sequential(
113 | conv1x1(self.inplanes, planes * block.expansion, stride),
114 | bn(planes * block.expansion),
115 | )
116 |
117 | layers = []
118 | layers.append(block(self.inplanes, planes, stride, downsample))
119 | self.inplanes = planes * block.expansion
120 | for _ in range(1, blocks):
121 | layers.append(block(self.inplanes, planes))
122 |
123 | return nn.Sequential(*layers)
124 |
125 | def forward(self, x):
126 | x = self.conv1(x)
127 | x = self.bn1(x)
128 | x = self.relu(x)
129 | x = self.maxpool(x)
130 |
131 | x = self.layer1(x)
132 | x = self.layer2(x)
133 | x = self.layer3(x)
134 | x = self.layer4(x)
135 |
136 | return x
137 |
138 |
139 | def resnet18(pretrained_path=None):
140 | """Constructs a ResNet-18 model.
141 | Args:
142 | pretrained (bool): If True, returns a model pre-trained on ImageNet
143 | """
144 | model = ResNet(BasicBlock, [2, 2, 2, 2])
145 | if pretrained_path is not None:
146 | model.load_state_dict(torch.load(pretrained_path))
147 | print('Loaded pre-trained weights')
148 | return model
149 |
150 |
151 | def resnet34(pretrained_path=None, **kwargs):
152 | """Constructs a ResNet-34 model.
153 | Args:
154 | pretrained (bool): If True, returns a model pre-trained on ImageNet
155 | """
156 | model = ResNet(BasicBlock, [3, 4, 6, 3], **kwargs)
157 | if pretrained_path is not None:
158 | model.load_state_dict(torch.load(osp.join(pretrained_path, 'resnet34.pth')))
159 | print('Loaded pre-trained weights')
160 | return model
161 |
162 |
163 | def resnet50(pretrained_path=None, **kwargs):
164 | """Constructs a ResNet-50 model.
165 | Args:
166 | pretrained (bool): If True, returns a model pre-trained on ImageNet
167 | """
168 | model = ResNet(Bottleneck, [3, 4, 6, 3], **kwargs)
169 | if pretrained_path is not None:
170 | model.load_state_dict(torch.load(osp.join(pretrained_path, 'resnet50.pth')))
171 | print('Loaded pre-trained weights')
172 | return model
173 |
174 |
175 | def resnet101(pretrained_path=None, **kwargs):
176 | """Constructs a ResNet-101 model.
177 | Args:
178 | pretrained (bool): If True, returns a model pre-trained on ImageNet
179 | """
180 | model = ResNet(Bottleneck, [3, 4, 23, 3], **kwargs)
181 | if pretrained_path is not None:
182 | model.load_state_dict(torch.load(osp.join(pretrained_path, 'resnet101.pth')))
183 | print('Loaded pre-trained weights')
184 | return model
185 |
--------------------------------------------------------------------------------
/mono/model/registry.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding:utf-8 -*-
3 | # Author: Duanzhixiang(zhixiangduan@deepmotion.ai)
4 |
5 | import torch
6 | import torch.nn as nn
7 |
8 | class Registry(object):
9 | def __init__(self, name):
10 | self._name = name
11 | self._module_dict = dict()
12 |
13 | @property
14 | def name(self):
15 | return self._name
16 |
17 | @property
18 | def module_dict(self):
19 | return self._module_dict
20 |
21 | def _register_module(self, module_class):
22 | """Register a module.
23 |
24 | Args:
25 | module (:obj:`nn.Module`): Module to be registered.
26 | """
27 | if not issubclass(module_class, nn.Module):
28 | raise TypeError(
29 | 'module must be a child of nn.Module, but got {}'.format(
30 | module_class))
31 | module_name = module_class.__name__
32 | if module_name in self._module_dict:
33 | raise KeyError('{} is already registered in {}'.format(
34 | module_name, self.name))
35 | self._module_dict[module_name] = module_class
36 |
37 | def register_module(self, cls):
38 | self._register_module(cls)
39 | return cls
40 |
41 | MONO = Registry('mono')
42 |
--------------------------------------------------------------------------------
/mono/tools/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sconlyshootery/FeatDepth/550420b3fb51a027549716b74c6fbce41651d3a5/mono/tools/__init__.py
--------------------------------------------------------------------------------
/mono/tools/geometry.py:
--------------------------------------------------------------------------------
1 | """
2 | Provides generic geometry algorithms.
3 | author: Michael Grupp
4 | This file is part of evo (github.com/MichaelGrupp/evo).
5 | evo is free software: you can redistribute it and/or modify
6 | it under the terms of the GNU General Public License as published by
7 | the Free Software Foundation, either version 3 of the License, or
8 | (at your option) any later version.
9 | evo is distributed in the hope that it will be useful,
10 | but WITHOUT ANY WARRANTY; without even the implied warranty of
11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 | GNU General Public License for more details.
13 | You should have received a copy of the GNU General Public License
14 | along with evo. If not, see .
15 | """
16 |
17 | import numpy as np
18 |
19 |
20 | def umeyama_alignment(x, y, with_scale=False):
21 | """
22 | Computes the least squares solution parameters of an Sim(m) matrix
23 | that minimizes the distance between a set of registered points.
24 | Umeyama, Shinji: Least-squares estimation of transformation parameters
25 | between two point patterns. IEEE PAMI, 1991
26 | :param x: mxn matrix of points, m = dimension, n = nr. of data points
27 | :param y: mxn matrix of points, m = dimension, n = nr. of data points
28 | :param with_scale: set to True to align also the scale (default: 1.0 scale)
29 | :return: r, t, c - rotation matrix, translation vector and scale factor
30 | """
31 | if x.shape != y.shape:
32 | print("data matrices must have the same shape")
33 |
34 | # m = dimension, n = nr. of data points
35 | m, n = x.shape
36 |
37 | # means, eq. 34 and 35
38 | mean_x = x.mean(axis=1)
39 | mean_y = y.mean(axis=1)
40 |
41 | # variance, eq. 36
42 | # "transpose" for column subtraction
43 | sigma_x = 1.0 / n * (np.linalg.norm(x - mean_x[:, np.newaxis])**2)
44 |
45 | # covariance matrix, eq. 38
46 | outer_sum = np.zeros((m, m))
47 | for i in range(n):
48 | outer_sum += np.outer((y[:, i] - mean_y), (x[:, i] - mean_x))
49 | cov_xy = np.multiply(1.0 / n, outer_sum)
50 |
51 | # SVD (text betw. eq. 38 and 39)
52 | u, d, v = np.linalg.svd(cov_xy)
53 |
54 | # S matrix, eq. 43
55 | s = np.eye(m)
56 | if np.linalg.det(u) * np.linalg.det(v) < 0.0:
57 | # Ensure a RHS coordinate system (Kabsch algorithm).
58 | s[m - 1, m - 1] = -1
59 |
60 | # rotation, eq. 40
61 | r = u.dot(s).dot(v)
62 |
63 | # scale & translation, eq. 42 and 41
64 | c = 1 / sigma_x * np.trace(np.diag(d).dot(s)) if with_scale else 1.0
65 | t = mean_y - np.multiply(c, r.dot(mean_x))
66 |
67 | return r, t, c
68 |
69 |
70 | def arc_len(x):
71 | """
72 | :param x: nxm array of points, m=dimension
73 | :return: the (discrete approximated) arc-length of the point sequence
74 | """
75 | return np.sum(np.linalg.norm(x[:-1] - x[1:], axis=1))
76 |
77 |
78 | def accumulated_distances(x):
79 | """
80 | :param x: nxm array of points, m=dimension
81 | :return: the accumulated distances along the point sequence
82 | """
83 | return np.concatenate((np.array([0]),
84 | np.cumsum(np.linalg.norm(x[:-1] - x[1:], axis=1))))
--------------------------------------------------------------------------------
/mono/tools/lie_algebra.py:
--------------------------------------------------------------------------------
1 | # -*- coding: UTF8 -*-
2 | """
3 | Provides functions for Lie group calculations.
4 | author: Michael Grupp
5 | This file is part of evo (github.com/MichaelGrupp/evo).
6 | evo is free software: you can redistribute it and/or modify
7 | it under the terms of the GNU General Public License as published by
8 | the Free Software Foundation, either version 3 of the License, or
9 | (at your option) any later version.
10 | evo is distributed in the hope that it will be useful,
11 | but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | GNU General Public License for more details.
14 | You should have received a copy of the GNU General Public License
15 | along with evo. If not, see .
16 | """
17 |
18 | import numpy as np
19 | import scipy.linalg as sl
20 |
21 | import mono.tools.transformations as tr
22 |
23 |
24 | def hat(v):
25 | """
26 | :param v: 3x1 vector
27 | :return: 3x3 skew symmetric matrix
28 | """
29 | # yapf: disable
30 | return np.array([[0.0, -v[2], v[1]],
31 | [v[2], 0.0, -v[0]],
32 | [-v[1], v[0], 0.0]])
33 | # yapf: enable
34 |
35 |
36 | def vee(m):
37 | """
38 | :param m: 3x3 skew symmetric matrix
39 | :return: 3x1 vector
40 | """
41 | return np.array([-m[1, 2], m[0, 2], -m[0, 1]])
42 |
43 |
44 | def so3_exp(axis, angle):
45 | """
46 | Computes an SO(3) matrix from an axis/angle representation.
47 | Code source: http://stackoverflow.com/a/25709323
48 | :param axis: 3x1 rotation axis (unit vector!)
49 | :param angle: radians
50 | :return: SO(3) rotation matrix (matrix exponential of so(3))
51 | """
52 | return sl.expm(np.cross(np.eye(3), axis / np.linalg.norm(axis) * angle))
53 |
54 |
55 | def so3_log(r, return_angle_only=True, return_skew=False):
56 | """
57 | :param r: SO(3) rotation matrix
58 | :param return_angle_only: return only the angle (default)
59 | :param return_skew: return skew symmetric Lie algebra element
60 | :return: axis/angle
61 | or if skew:
62 | 3x3 skew symmetric logarithmic map in so(3) (Ma, Soatto eq. 2.8)
63 | """
64 | if not is_so3(r):
65 | print("matrix is not a valid SO(3) group element")
66 | if return_angle_only and not return_skew:
67 | return np.arccos(min(1, max(-1, (np.trace(r) - 1) / 2)))
68 | angle, axis, _ = tr.rotation_from_matrix(se3(r, [0, 0, 0]))
69 | if return_skew:
70 | return hat(axis * angle)
71 | else:
72 | return axis, angle
73 |
74 |
75 | def se3(r=np.eye(3), t=np.array([0, 0, 0])):
76 | """
77 | :param r: SO(3) rotation matrix
78 | :param t: 3x1 translation vector
79 | :return: SE(3) transformation matrix
80 | """
81 | se3 = np.eye(4)
82 | se3[:3, :3] = r
83 | se3[:3, 3] = t
84 | return se3
85 |
86 |
87 | def sim3(r, t, s):
88 | """
89 | :param r: SO(3) rotation matrix
90 | :param t: 3x1 translation vector
91 | :param s: positive, non-zero scale factor
92 | :return: Sim(3) similarity transformation matrix
93 | """
94 | sim3 = np.eye(4)
95 | sim3[:3, :3] = s * r
96 | sim3[:3, 3] = t
97 | return sim3
98 |
99 |
100 | def so3_from_se3(p):
101 | """
102 | :param p: absolute SE(3) pose
103 | :return: the SO(3) rotation matrix in p
104 | """
105 | return p[:3, :3]
106 |
107 |
108 | def se3_inverse(p):
109 | """
110 | :param p: absolute SE(3) pose
111 | :return: the inverted pose
112 | """
113 | r_inv = p[:3, :3].transpose()
114 | t_inv = -r_inv.dot(p[:3, 3])
115 | return se3(r_inv, t_inv)
116 |
117 |
118 | def is_so3(r):
119 | """
120 | :param r: a 3x3 matrix
121 | :return: True if r is in the SO(3) group
122 | """
123 | # Check the determinant.
124 | det_valid = np.isclose(np.linalg.det(r), [1.0], atol=1e-6)
125 | # Check if the transpose is the inverse.
126 | inv_valid = np.allclose(r.transpose().dot(r), np.eye(3), atol=1e-6)
127 | return det_valid and inv_valid
128 |
129 |
130 | def is_se3(p):
131 | """
132 | :param p: a 4x4 matrix
133 | :return: True if p is in the SE(3) group
134 | """
135 | rot_valid = is_so3(p[:3, :3])
136 | lower_valid = np.equal(p[3, :], np.array([0.0, 0.0, 0.0, 1.0])).all()
137 | return rot_valid and lower_valid
138 |
139 |
140 | def is_sim3(p, s):
141 | """
142 | :param p: a 4x4 matrix
143 | :param s: expected scale factor
144 | :return: True if p is in the Sim(3) group with scale s
145 | """
146 | rot = p[:3, :3]
147 | rot_unscaled = np.multiply(rot, 1.0 / s)
148 | rot_valid = is_so3(rot_unscaled)
149 | lower_valid = np.equal(p[3, :], np.array([0.0, 0.0, 0.0, 1.0])).all()
150 | return rot_valid and lower_valid
151 |
152 |
153 | def relative_so3(r1, r2):
154 | """
155 | :param r1, r2: SO(3) matrices
156 | :return: the relative rotation r1^{⁻1} * r2
157 | """
158 | return np.dot(r1.transpose(), r2)
159 |
160 |
161 | def relative_se3(p1, p2):
162 | """
163 | :param p1, p2: SE(3) matrices
164 | :return: the relative transformation p1^{⁻1} * p2
165 | """
166 | return np.dot(se3_inverse(p1), p2)
167 |
168 |
169 | def random_so3():
170 | """
171 | :return: a random SO(3) matrix (for debugging)
172 | """
173 | return tr.random_rotation_matrix()[:3, :3]
174 |
175 |
176 | def random_se3():
177 | """
178 | :return: a random SE(3) matrix (for debugging)
179 | """
180 | r = random_so3()
181 | t = tr.random_vector(3)
182 | return se3(r, t)
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | numpy
2 | pandas
3 | matplotlib
4 | scikit-image
5 | scipy
6 | imageio
7 | tqdm
8 | cython
9 | mmcv==0.4.4
10 | torch>=1.1
11 | torchvision>=0.4.0
12 | pypng
--------------------------------------------------------------------------------
/run.py:
--------------------------------------------------------------------------------
1 | import os
2 |
3 | if __name__ == '__main__':
4 | # os.system('/home/user/software/anaconda/envs/py37t11/bin/python -m torch.distributed.launch --master_port=9900 --nproc_per_node=1 train.py')
5 | # os.system('/home/hadoop-wallemnl/cephfs/data/shuchang/envs/py37t11/bin/python -m torch.distributed.launch --master_port=9900 --nproc_per_node=8 train.py')
6 | os.system('/home/sconly/Documents/code/py37t11/bin/python -m torch.distributed.launch --master_port=9900 --nproc_per_node=1 train.py --config ./config/cfg_kitti_fm.py --work_dir /media/sconly/harddisk/weight/fmdepth')
7 |
--------------------------------------------------------------------------------
/scripts/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sconlyshootery/FeatDepth/550420b3fb51a027549716b74c6fbce41651d3a5/scripts/__init__.py
--------------------------------------------------------------------------------
/scripts/draw_odometry.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import, division, print_function
2 | import os
3 | import sys
4 | import argparse
5 | import numpy as np
6 |
7 | import torch
8 | from torch.utils.data import DataLoader
9 |
10 | sys.path.append('.')
11 | sys.path.append('..')
12 | from mono.datasets.euroc_dataset import FolderDataset
13 | from mono.datasets.kitti_dataset import KITTIOdomDataset
14 | from mono.datasets.utils import readlines,transformation_from_parameters
15 | from mono.model.mono_baseline.pose_encoder import PoseEncoder
16 | from mono.model.mono_baseline.pose_decoder import PoseDecoder
17 | from mono.tools.kitti_evaluation_toolkit import kittiOdomEval
18 |
19 |
20 | def odo(opt):
21 | if opt.kitti:
22 | filenames = readlines("../mono/datasets/splits/odom/test_files_{:02d}.txt".format(opt.sequence_id))
23 |
24 | dataset = KITTIOdomDataset(opt.data_path,
25 | filenames,
26 | opt.height,
27 | opt.width,
28 | [0, 1],
29 | is_train=False,
30 | img_ext='.png',
31 | gt_depth_path=None)
32 | else:
33 | dataset = FolderDataset(opt.data_path,
34 | None,
35 | opt.height,
36 | opt.width,
37 | [0, 1],
38 | is_train=False,
39 | img_ext='.png',
40 | gt_depth_path=None)
41 |
42 | dataloader = DataLoader(dataset,
43 | 1,
44 | shuffle=False,
45 | num_workers=4,
46 | pin_memory=True,
47 | drop_last=False)
48 |
49 | pose_encoder = PoseEncoder(18, None, 2)
50 | pose_decoder = PoseDecoder(pose_encoder.num_ch_enc)
51 |
52 | checkpoint = torch.load(opt.model_path)
53 | for name, param in pose_encoder.state_dict().items():
54 | pose_encoder.state_dict()[name].copy_(checkpoint['state_dict']['PoseEncoder.' + name])
55 | for name, param in pose_decoder.state_dict().items():
56 | pose_decoder.state_dict()[name].copy_(checkpoint['state_dict']['PoseDecoder.' + name])
57 | pose_encoder.cuda()
58 | pose_encoder.eval()
59 | pose_decoder.cuda()
60 | pose_decoder.eval()
61 |
62 | global_pose = np.identity(4)
63 | poses = [global_pose[0:3, :].reshape(1, 12)]
64 |
65 | with torch.no_grad():
66 | for batch_idx, inputs in enumerate(dataloader):
67 | for key, ipt in inputs.items():
68 | inputs[key] = ipt.cuda()
69 | all_color_aug = torch.cat([inputs[("color_aug", i, 0)] for i in [0,1]], 1)
70 | axisangle, translation = pose_decoder(pose_encoder(all_color_aug))
71 | g = transformation_from_parameters(axisangle[:, 0], translation[:, 0])
72 | backward_transform = g.squeeze().cpu().numpy()#the transformation from frame +1 to frame 0
73 | global_pose = global_pose @ np.linalg.inv(backward_transform)
74 | poses.append(global_pose[0:3, :].reshape(1, 12))
75 | poses = np.concatenate(poses, axis=0)
76 |
77 | if opt.kitti:
78 | filename = os.path.join(opt.result_dir, "{:02d}_pred.txt".format(opt.sequence_id))
79 | else:
80 | filename = os.path.join(opt.result_dir, "fm_ms_euroc_mh04_diff_3.txt")
81 |
82 | np.savetxt(filename, poses, delimiter=' ', fmt='%1.8e')
83 | if opt.kitti:
84 | opt.eva_seqs = '{:02d}_pred'.format(opt.sequence_id)
85 | pose_eval = kittiOdomEval(opt)
86 | pose_eval.eval(toCameraCoord=False) # set the value according to the predicted results
87 | print('saving into ', opt.result_dir)
88 |
89 |
90 | if __name__ == "__main__":
91 | parser = argparse.ArgumentParser(description='Train a detector')
92 | parser.add_argument('--model_path', default='/media/sconly/24eda5d5-e79b-423b-8dcc-8339a15f3219/weight/fm_depth_odom.pth', help='model save path')
93 | parser.add_argument('--data_path', default='/media/sconly/24eda5d5-e79b-423b-8dcc-8339a15f3219/data/kitti/Odometry', help='kitti odometry dataset path')
94 | parser.add_argument('--gt_dir', default='../mono/datasets/gt_pose',help='kitti odometry gt path')
95 | parser.add_argument('--result_dir', default='/media/sconly/24eda5d5-e79b-423b-8dcc-8339a15f3219/odom/')
96 | parser.add_argument('--height', default=192)
97 | parser.add_argument('--width', default=640)
98 | parser.add_argument('--kitti', default=True, help='whether test on the kitti odometry dataset')
99 | parser.add_argument('--sequence_id', default=9, help='which kitti odometry sequence for testing')
100 | opts = parser.parse_args()
101 | odo(opts)
102 | print("you can also run 'evo_traj kitti -s *.txt *.txt --ref=*.txt -p --plot_mode=xz' in terminal for visualization")
--------------------------------------------------------------------------------
/scripts/eval_depth.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import, division, print_function
2 | import cv2
3 | import sys
4 | import numpy as np
5 | from mmcv import Config
6 |
7 | import torch
8 | from torch.utils.data import DataLoader
9 |
10 | sys.path.append('.')
11 | from mono.model.registry import MONO
12 | from mono.model.mono_baseline.layers import disp_to_depth
13 | from mono.datasets.utils import readlines, compute_errors
14 | from mono.datasets.kitti_dataset import KITTIRAWDataset
15 |
16 | cv2.setNumThreads(0) # This speeds up evaluation 5x on our unix systems (OpenCV 3.3.1)
17 | STEREO_SCALE_FACTOR = 36
18 | MIN_DEPTH=1e-3
19 | MAX_DEPTH=80
20 |
21 |
22 | def evaluate(MODEL_PATH, CFG_PATH, GT_PATH):
23 | filenames = readlines("../mono/datasets/splits/exp/val_files.txt")
24 | cfg = Config.fromfile(CFG_PATH)
25 |
26 | dataset = KITTIRAWDataset(cfg.data['in_path'],
27 | filenames,
28 | cfg.data['height'],
29 | cfg.data['width'],
30 | [0],
31 | is_train=False,
32 | gt_depth_path=GT_PATH)
33 |
34 | dataloader = DataLoader(dataset,
35 | 1,
36 | shuffle=False,
37 | num_workers=4,
38 | pin_memory=True,
39 | drop_last=False)
40 |
41 | cfg.model['imgs_per_gpu'] = 1
42 | model = MONO.module_dict[cfg.model['name']](cfg.model)
43 | checkpoint = torch.load(MODEL_PATH)
44 | model.load_state_dict(checkpoint['state_dict'], strict=True)
45 | model.cuda()
46 | model.eval()
47 |
48 | pred_disps = []
49 | with torch.no_grad():
50 | for batch_idx, inputs in enumerate(dataloader):
51 | for key, ipt in inputs.items():
52 | inputs[key] = ipt.cuda()
53 | outputs = model(inputs)
54 |
55 | disp = outputs[("disp", 0, 0)]
56 |
57 | pred_disp, _ = disp_to_depth(disp, 0.1, 100)
58 | pred_disp = pred_disp.cpu()[:, 0].numpy()
59 | pred_disps.append(pred_disp)
60 | pred_disps = np.concatenate(pred_disps)
61 |
62 | gt_depths = np.load(GT_PATH, allow_pickle=True, fix_imports=True, encoding='latin1')["data"]
63 |
64 | print("-> Evaluating")
65 | if cfg.data['stereo_scale']:
66 | print('using baseline')
67 | else:
68 | print('using mean scaling')
69 |
70 | errors = []
71 | ratios = []
72 | for i in range(pred_disps.shape[0]):
73 | gt_depth = gt_depths[i]
74 | gt_height, gt_width = gt_depth.shape[:2]
75 |
76 | pred_disp = pred_disps[i]
77 | pred_disp = cv2.resize(pred_disp, (gt_width, gt_height))
78 |
79 | pred_depth = 1 / pred_disp
80 |
81 | mask = np.logical_and(gt_depth > MIN_DEPTH, gt_depth < MAX_DEPTH)
82 | crop = np.array([0.40810811 * gt_height, 0.99189189 * gt_height,
83 | 0.03594771 * gt_width, 0.96405229 * gt_width]).astype(np.int32)
84 | crop_mask = np.zeros(mask.shape)
85 | crop_mask[crop[0]:crop[1], crop[2]:crop[3]] = 1
86 | mask = np.logical_and(mask, crop_mask)
87 |
88 | pred_depth = pred_depth[mask]
89 | gt_depth = gt_depth[mask]
90 |
91 | ratio = np.median(gt_depth) / np.median(pred_depth)
92 | ratios.append(ratio)
93 |
94 | if cfg.data['stereo_scale']:
95 | ratio = STEREO_SCALE_FACTOR
96 |
97 | pred_depth *= ratio
98 | pred_depth[pred_depth < MIN_DEPTH] = MIN_DEPTH
99 | pred_depth[pred_depth > MAX_DEPTH] = MAX_DEPTH
100 | errors.append(compute_errors(gt_depth, pred_depth))
101 |
102 | ratios = np.array(ratios)
103 | med = np.median(ratios)
104 | mean_errors = np.array(errors).mean(0)
105 | print("Scaling ratios | med: {:0.3f} | std: {:0.3f}".format(med, np.std(ratios / med)))
106 | print("\n" + ("{:>}| " * 7).format("abs_rel", "sq_rel", "rmse", "rmse_log", "a1", "a2", "a3"))
107 | print(("&{:.3f} " * 7).format(*mean_errors.tolist()) + "\\\\")
108 | print("\n-> Done!")
109 |
110 |
111 | if __name__ == "__main__":
112 | CFG_PATH = '../config/cfg_kitti_fm.py'#path to cfg file
113 | GT_PATH = '/media/user/harddisk/data/kitti/kitti_raw/rawdata/gt_depths.npz'#path to kitti gt depth
114 | MODEL_PATH = '/media/user/harddisk/weight/fm_depth.pth'#path to model weights
115 | evaluate(MODEL_PATH, CFG_PATH, GT_PATH)
--------------------------------------------------------------------------------
/scripts/eval_depth_pp.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import, division, print_function
2 | import cv2
3 | import sys
4 | import numpy as np
5 | from mmcv import Config
6 |
7 | import torch
8 | from torch.utils.data import DataLoader
9 |
10 | sys.path.append('.')
11 | sys.path.append('..')
12 | from mono.model.registry import MONO
13 | from mono.model.mono_baseline.layers import disp_to_depth
14 | from mono.datasets.utils import readlines, compute_errors
15 | from mono.datasets.kitti_dataset import KITTIRAWDataset
16 |
17 | cv2.setNumThreads(0) # This speeds up evaluation 5x on our unix systems (OpenCV 3.3.1)
18 | STEREO_SCALE_FACTOR = 36
19 | MIN_DEPTH=1e-3
20 | MAX_DEPTH=80
21 |
22 | def batch_post_process_disparity(l_disp, r_disp):
23 | _, h, w = l_disp.shape
24 | m_disp = 0.5 * (l_disp + r_disp)
25 | l, _ = np.meshgrid(np.linspace(0, 1, w), np.linspace(0, 1, h))
26 | l_mask = (1.0 - np.clip(20 * (l - 0.05), 0, 1))[None, ...]
27 | r_mask = l_mask[:, :, ::-1]
28 | return r_mask * l_disp + l_mask * r_disp + (1.0 - l_mask - r_mask) * m_disp
29 |
30 | def evaluate(MODEL_PATH, CFG_PATH, GT_PATH):
31 | filenames = readlines("../mono/datasets/splits/exp/val_files.txt")
32 | cfg = Config.fromfile(CFG_PATH)
33 |
34 | dataset = KITTIRAWDataset(cfg.data['in_path'],
35 | filenames,
36 | cfg.data['height'],
37 | cfg.data['width'],
38 | [0],
39 | is_train=False,
40 | gt_depth_path=None)
41 |
42 | dataloader = DataLoader(dataset,
43 | 2,
44 | shuffle=False,
45 | num_workers=1,
46 | pin_memory=True,
47 | drop_last=True)
48 |
49 | cfg.model['imgs_per_gpu'] = 2
50 | model = MONO.module_dict[cfg.model['name']](cfg.model)
51 | checkpoint = torch.load(MODEL_PATH)
52 | model.load_state_dict(checkpoint['state_dict'], strict=True)
53 | model.cuda()
54 | model.eval()
55 |
56 | pred_disps = []
57 | with torch.no_grad():
58 | for batch_idx, inputs in enumerate(dataloader):
59 | print(batch_idx)
60 | for key, ipt in inputs.items():
61 | inputs[key] = ipt.cuda()
62 |
63 | outputs = model(inputs)
64 |
65 | disp = outputs[("disp", 0, 0)]
66 | # N = pred_disp.shape[0] // 2
67 | # pred_disp = batch_post_process_disparity(pred_disp[:N], pred_disp[N:, :, ::-1])
68 | pred_disp, _ = disp_to_depth(disp, 0.1, 100)
69 | pred_disp = pred_disp.cpu()[:, 0].numpy()
70 | pred_disps.append(pred_disp)
71 | pred_disps = np.concatenate(pred_disps)
72 |
73 | gt_depths = np.load(GT_PATH, allow_pickle=True, fix_imports=True, encoding='latin1')["data"]
74 |
75 | print("-> Evaluating")
76 | if cfg.data['stereo_scale']:
77 | print('using baseline')
78 | else:
79 | print('using mean scaling')
80 |
81 | errors = []
82 | ratios = []
83 | for i in range(pred_disps.shape[0]):
84 | gt_depth = gt_depths[i]
85 | gt_height, gt_width = gt_depth.shape[:2]
86 |
87 | pred_disp = pred_disps[i]
88 | pred_disp = cv2.resize(pred_disp, (gt_width, gt_height))
89 |
90 | pred_depth = 1 / pred_disp
91 |
92 | mask = np.logical_and(gt_depth > MIN_DEPTH, gt_depth < MAX_DEPTH)
93 | crop = np.array([0.40810811 * gt_height, 0.99189189 * gt_height,
94 | 0.03594771 * gt_width, 0.96405229 * gt_width]).astype(np.int32)
95 | crop_mask = np.zeros(mask.shape)
96 | crop_mask[crop[0]:crop[1], crop[2]:crop[3]] = 1
97 | mask = np.logical_and(mask, crop_mask)
98 |
99 | pred_depth = pred_depth[mask]
100 | gt_depth = gt_depth[mask]
101 |
102 | ratio = np.median(gt_depth) / np.median(pred_depth)
103 | ratios.append(ratio)
104 |
105 | if cfg.data['stereo_scale']:
106 | ratio = STEREO_SCALE_FACTOR
107 |
108 | pred_depth *= ratio
109 | pred_depth[pred_depth < MIN_DEPTH] = MIN_DEPTH
110 | pred_depth[pred_depth > MAX_DEPTH] = MAX_DEPTH
111 | errors.append(compute_errors(gt_depth, pred_depth))
112 |
113 | ratios = np.array(ratios)
114 | med = np.median(ratios)
115 | mean_errors = np.array(errors).mean(0)
116 | print("Scaling ratios | med: {:0.3f} | std: {:0.3f}".format(med, np.std(ratios / med)))
117 | print("\n" + ("{:>}| " * 7).format("abs_rel", "sq_rel", "rmse", "rmse_log", "a1", "a2", "a3"))
118 | print(("&{:.3f} " * 7).format(*mean_errors.tolist()) + "\\\\")
119 | print("\n-> Done!")
120 |
121 |
122 | if __name__ == "__main__":
123 | CFG_PATH = '../config/cfg_kitti_fm.py'#path to cfg file
124 | GT_PATH = '/media/sconly/harddisk/data/kitti/kitti_raw/rawdata/gt_depths.npz'#path to kitti gt depth
125 | MODEL_PATH = '/media/sconly/harddisk/weight/fm_depth.pth'#path to model weights
126 | evaluate(MODEL_PATH, CFG_PATH, GT_PATH)
--------------------------------------------------------------------------------
/scripts/eval_pose.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import, division, print_function
2 | import os
3 | import sys
4 | import numpy as np
5 |
6 | import torch
7 | from torch.utils.data import DataLoader
8 |
9 | sys.path.append('.')
10 | from mono.datasets.utils import readlines, dump_xyz, compute_ate, transformation_from_parameters
11 | from mono.datasets.kitti_dataset import KITTIOdomDataset
12 | from mono.model.mono_fm.pose_encoder import PoseEncoder
13 | from mono.model.mono_fm.pose_decoder import PoseDecoder
14 |
15 |
16 |
17 |
18 |
19 | def evaluate(data_path,model_path,sequence_id,height,width):
20 | filenames = readlines("../mono/datasets/splits/odom/test_files_{:02d}.txt".format(sequence_id))
21 |
22 | dataset = KITTIOdomDataset(data_path,
23 | filenames,
24 | height,
25 | width,
26 | [0, 1],
27 | is_train=False,
28 | img_ext='.png',
29 | gt_depth_path=None)
30 |
31 | dataloader = DataLoader(dataset,
32 | 1,
33 | shuffle=False,
34 | num_workers=4,
35 | pin_memory=True,
36 | drop_last=False)
37 |
38 |
39 | pose_encoder = PoseEncoder(18, None, 2)
40 | pose_decoder = PoseDecoder(pose_encoder.num_ch_enc)
41 |
42 | checkpoint = torch.load(model_path)
43 | for name, param in pose_encoder.state_dict().items():
44 | pose_encoder.state_dict()[name].copy_(checkpoint['state_dict']['PoseEncoder.' + name])
45 | for name, param in pose_decoder.state_dict().items():
46 | pose_decoder.state_dict()[name].copy_(checkpoint['state_dict']['PoseDecoder.' + name])
47 | pose_encoder.cuda()
48 | pose_encoder.eval()
49 | pose_decoder.cuda()
50 | pose_decoder.eval()
51 |
52 | pred_poses = []
53 |
54 | print("-> Computing pose predictions")
55 | with torch.no_grad():
56 | for inputs in dataloader:
57 | for key, ipt in inputs.items():
58 | inputs[key] = ipt.cuda()
59 | all_color_aug = torch.cat([inputs[("color_aug", i, 0)] for i in [0, 1]], 1)
60 | features = pose_encoder(all_color_aug)
61 | axisangle, translation = pose_decoder(features)
62 | pred_poses.append(transformation_from_parameters(axisangle[:, 0], translation[:, 0]).cpu().numpy())
63 | pred_poses = np.concatenate(pred_poses)
64 |
65 | gt_poses_path = os.path.join(data_path, "poses", "{:02d}.txt".format(sequence_id))
66 | gt_global_poses = np.loadtxt(gt_poses_path).reshape(-1, 3, 4)
67 | gt_global_poses = np.concatenate((gt_global_poses, np.zeros((gt_global_poses.shape[0], 1, 4))), 1)
68 | gt_global_poses[:, 3, 3] = 1
69 | gt_xyzs = gt_global_poses[:, :3, 3]
70 | gt_local_poses = []
71 | for i in range(1, len(gt_global_poses)):
72 | gt_local_poses.append(np.linalg.inv(np.dot(np.linalg.inv(gt_global_poses[i - 1]), gt_global_poses[i])))
73 |
74 | ates = []
75 | num_frames = gt_xyzs.shape[0]
76 | track_length = 5
77 | for i in range(0, num_frames - 1):
78 | local_xyzs = np.array(dump_xyz(pred_poses[i:i + track_length - 1]))
79 | gt_local_xyzs = np.array(dump_xyz(gt_local_poses[i:i + track_length - 1]))
80 | ates.append(compute_ate(gt_local_xyzs, local_xyzs))
81 |
82 | print("\n odom_{} Trajectory error: {:0.3f}, std: {:0.3f}\n".format(sequence_id, np.mean(ates), np.std(ates)))
83 |
84 | # save_path = os.path.join(load_weights_folder, "poses.npy")
85 | # np.save(save_path, pred_poses)
86 | # print("-> Predictions saved to", save_path)
87 |
88 |
89 | if __name__ == "__main__":
90 | data_path='/media/user/harddisk/data/kitti/Odometry/dataset'#path to kitti odometry
91 | model_path = '/media/user/harddisk/weight/fm_depth.pth'
92 | height=320
93 | width=1024
94 | sequence_id =9
95 | evaluate(data_path,model_path,sequence_id,height,width)
96 | sequence_id = 10
97 | evaluate(data_path,model_path,sequence_id,height,width)
98 |
--------------------------------------------------------------------------------
/scripts/infer.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import, division, print_function
2 | import os
3 | import cv2
4 | import sys
5 | import matplotlib.pyplot as plt
6 | import numpy as np
7 | from mmcv import Config
8 |
9 | import torch
10 | from torch.utils.data import DataLoader
11 |
12 | sys.path.append('.')
13 | sys.path.append('..')
14 | from mono.model.registry import MONO
15 | from mono.model.mono_baseline.layers import disp_to_depth
16 | from mono.datasets.utils import readlines
17 | from mono.datasets.kitti_dataset import KITTIRAWDataset
18 |
19 | cv2.setNumThreads(0) # This speeds up evaluation 5x on our unix systems (OpenCV 3.3.1)
20 |
21 | MIN_DEPTH=1e-3
22 | MAX_DEPTH=80
23 | SCALE = 36#we set baseline=0.0015m which is 36 times smaller than the actual value (0.54m)
24 |
25 | def transform(cv2_img, height=320, width=1024):
26 | im_tensor = torch.from_numpy(cv2_img.astype(np.float32)).cuda().unsqueeze(0)
27 | im_tensor = im_tensor.permute(0, 3, 1, 2).contiguous()
28 | im_tensor = torch.nn.functional.interpolate(im_tensor, [height, width],mode='bilinear', align_corners=False)
29 | im_tensor /= 255
30 | return im_tensor
31 |
32 | def predict(cv2_img, model):
33 | original_height, original_width = cv2_img.shape[:2]
34 | im_tensor = transform(cv2_img)
35 |
36 | with torch.no_grad():
37 | input = {}
38 | input['color_aug', 0, 0] = im_tensor
39 | outputs = model(input)
40 |
41 | disp = outputs[("disp", 0, 0)]
42 | disp_resized = torch.nn.functional.interpolate(disp, (original_height, original_width), mode="bilinear", align_corners=False)
43 | min_disp = 1/MAX_DEPTH
44 | max_disp = 1/MIN_DEPTH
45 | depth = 1/(disp_resized.squeeze().cpu().numpy()*max_disp + min_disp) * SCALE
46 | return depth, disp_resized.squeeze().cpu().numpy()
47 |
48 | def evaluate(cfg_path, model_path, img_path, output_path):
49 | cfg = Config.fromfile(cfg_path)
50 | cfg['model']['depth_pretrained_path'] = None
51 | cfg['model']['pose_pretrained_path'] = None
52 | cfg['model']['extractor_pretrained_path'] = None
53 | model = MONO.module_dict[cfg.model['name']](cfg.model)
54 | checkpoint = torch.load(model_path)
55 | model.load_state_dict(checkpoint['state_dict'], strict=True)
56 | model.cuda()
57 | model.eval()
58 |
59 | with torch.no_grad():
60 | cv2_img = cv2.imread(img_path)
61 | cv2_img = cv2.cvtColor(cv2_img, cv2.COLOR_BGR2RGB)
62 |
63 | depth, disp_resized = predict(cv2_img, model)
64 |
65 | vmax = np.percentile(disp_resized, 95)
66 | plt.imsave(output_path, disp_resized, cmap='magma', vmax=vmax)
67 |
68 | print("\n-> Done!")
69 |
70 |
71 | if __name__ == "__main__":
72 | cfg_path = '../config/cfg_kitti_fm.py'# path to cfg file
73 | model_path = '/media/sconly/harddisk/weight/fm_depth.pth'# path to model weight
74 | img_path = '../assets/test.png'
75 | output_path = '../assets/test_disp.png' # dir for saving depth maps
76 | evaluate(cfg_path, model_path, img_path, output_path)
--------------------------------------------------------------------------------
/scripts/infer_singleimage.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import, division, print_function
2 | import os
3 | import cv2
4 | import sys
5 | import matplotlib.pyplot as plt
6 | import numpy as np
7 | from mmcv import Config
8 |
9 | import torch
10 | from torch.utils.data import DataLoader
11 |
12 | sys.path.append('.')
13 | from mono.model.registry import MONO
14 | from mono.model.mono_baseline.layers import disp_to_depth
15 | from mono.datasets.utils import readlines
16 | from mono.datasets.kitti_dataset import KITTIRAWDataset
17 |
18 | cv2.setNumThreads(0) # This speeds up evaluation 5x on our unix systems (OpenCV 3.3.1)
19 |
20 |
21 |
22 | def evaluate(cfg_path,model_path,gt_path, output_path):
23 | filenames = readlines("../mono/datasets/splits/exp/val_files.txt")
24 | cfg = Config.fromfile(cfg_path)
25 |
26 | dataset = KITTIRAWDataset(cfg.data['in_path'],
27 | filenames,
28 | cfg.data['height'],
29 | cfg.data['width'],
30 | [0],
31 | is_train=False,
32 | gt_depth_path=gt_path)
33 |
34 | dataloader = DataLoader(dataset,
35 | 1,
36 | shuffle=False,
37 | num_workers=4,
38 | pin_memory=True,
39 | drop_last=False)
40 |
41 | cfg.model['imgs_per_gpu'] = 1
42 | model = MONO.module_dict[cfg.model['name']](cfg.model)
43 | checkpoint = torch.load(model_path)
44 | model.load_state_dict(checkpoint['state_dict'], strict=False)
45 | model.cuda()
46 | model.eval()
47 |
48 | with torch.no_grad():
49 | for batch_idx, inputs in enumerate(dataloader):
50 | for key, ipt in inputs.items():
51 | inputs[key] = ipt.cuda()
52 | outputs = model(inputs)
53 |
54 | img_path = os.path.join(output_path, 'img_{:0>4d}.jpg'.format(batch_idx))
55 | plt.imsave(img_path, inputs[("color", 0, 0)][0].squeeze().transpose(0,1).transpose(1,2).cpu().numpy())
56 |
57 | disp = outputs[("disp", 0, 0)]
58 | pred_disp, _ = disp_to_depth(disp, 0.1, 100)
59 | pred_disp = pred_disp[0, 0].cpu().numpy()
60 | pred_disp = cv2.resize(pred_disp, (cfg.data['width'], cfg.data['height']))
61 |
62 | img_path = os.path.join(output_path, 'disp_{:0>4d}.jpg'.format(batch_idx))
63 | vmax = np.percentile(pred_disp, 95)
64 | plt.imsave(img_path, pred_disp, cmap='magma', vmax=vmax)
65 |
66 | print("\n-> Done!")
67 |
68 |
69 | if __name__ == "__main__":
70 | cfg_path = '../config/cfg_kitti_fm.py'# path to cfg file
71 | model_path = '/media/user/harddisk/weight/fm_depth.pth'# path to model weight
72 | gt_path = '/media/user/harddisk/data/kitti/kitti_raw/rawdata/gt_depths.npz' # path to kitti gt depth
73 | output_path = '/media/user/harddisk/results' # dir for saving depth maps
74 | if not os.path.exists(output_path):
75 | os.mkdir(output_path)
76 | evaluate(cfg_path,model_path,gt_path,output_path)
--------------------------------------------------------------------------------
/train.py:
--------------------------------------------------------------------------------
1 | from __future__ import division
2 |
3 | import argparse
4 | from mmcv import Config
5 | from mmcv.runner import load_checkpoint
6 |
7 | from mono.datasets.get_dataset import get_dataset
8 | from mono.apis import (train_mono,
9 | init_dist,
10 | get_root_logger,
11 | set_random_seed)
12 | from mono.model.registry import MONO
13 | import torch
14 |
15 |
16 | def parse_args():
17 | parser = argparse.ArgumentParser(description='Train a detector')
18 | parser.add_argument('--config',
19 | default='/home/user/Documents/code/fm_depth/config/cfg_kitti_fm_joint.py',
20 | help='train config file path')
21 | parser.add_argument('--work_dir',
22 | default='/media/user/harddisk/weight/fmdepth',
23 | help='the dir to save logs and models')
24 | parser.add_argument('--resume_from',
25 | help='the checkpoint file to resume from')
26 | parser.add_argument('--gpus',
27 | default='0',
28 | type=str,
29 | help='number of gpus to use '
30 | '(only applicable to non-distributed training)')
31 | parser.add_argument('--seed',
32 | type=int,
33 | default=1024,
34 | help='random seed')
35 | parser.add_argument('--launcher',
36 | choices=['none', 'pytorch', 'slurm', 'mpi'],
37 | default='pytorch',
38 | help='job launcher')
39 | parser.add_argument('--local_rank',
40 | type=int,
41 | default=0)
42 | args = parser.parse_args()
43 | return args
44 |
45 |
46 | def main():
47 | args = parse_args()
48 | print(args.config)
49 | cfg = Config.fromfile(args.config)
50 | cfg.work_dir = args.work_dir
51 |
52 | # set cudnn_benchmark
53 | if cfg.get('cudnn_benchmark', False):
54 | torch.backends.cudnn.benchmark = True
55 |
56 | if args.resume_from is not None:
57 | cfg.resume_from = args.resume_from
58 | cfg.gpus = [int(_) for _ in args.gpus.split(',')]
59 |
60 | # init distributed env first, since logger depends on the dist info.
61 | if args.launcher == 'none':
62 | distributed = False
63 | else:
64 | distributed = True
65 | init_dist(args.launcher, **cfg.dist_params)
66 |
67 | print('cfg is ', cfg)
68 | # init logger before other steps
69 | logger = get_root_logger(cfg.log_level)
70 | logger.info('Distributed training: {}'.format(distributed))
71 |
72 | # set random seeds
73 | if args.seed is not None:
74 | logger.info('Set random seed to {}'.format(args.seed))
75 | set_random_seed(args.seed)
76 |
77 | model_name = cfg.model['name']
78 | model = MONO.module_dict[model_name](cfg.model)
79 |
80 | if cfg.resume_from is not None:
81 | load_checkpoint(model, cfg.resume_from, map_location='cpu')
82 | elif cfg.finetune is not None:
83 | print('loading from', cfg.finetune)
84 | checkpoint = torch.load(cfg.finetune, map_location='cpu')
85 | model.load_state_dict(checkpoint['state_dict'], strict=False)
86 |
87 | train_dataset = get_dataset(cfg.data, training=True)
88 | if cfg.validate:
89 | val_dataset = get_dataset(cfg.data, training=False)
90 | else:
91 | val_dataset = None
92 |
93 | train_mono(model,
94 | train_dataset,
95 | val_dataset,
96 | cfg,
97 | distributed=distributed,
98 | validate=cfg.validate,
99 | logger=logger)
100 |
101 |
102 | if __name__ == '__main__':
103 | main()
--------------------------------------------------------------------------------