├── .gitignore ├── DATA.md ├── GETTING_STARTED.md ├── INSTALL.md ├── INSTALL.sh ├── LICENSE ├── README.md ├── configs ├── AcfNet │ ├── ResultOfAcfNet.md │ ├── kitti_2015_adaptive.py │ ├── kitti_2015_uniform.py │ ├── scene_flow_adaptive.py │ └── scene_flow_uniform.py ├── AnyNet │ ├── ResultOfAnyNet.md │ └── scene_flow.py ├── DeepPruner │ ├── ResultOfDeepPruner.md │ ├── scene_flow_4x.py │ └── scene_flow_8x.py ├── GCNet │ ├── kitti_2015.py │ └── scene_flow.py ├── PSMNet │ ├── ResultOfPSMNet.md │ ├── kitti_2015.py │ └── scene_flow.py └── StereoNet │ ├── ResultOfStereoNet.md │ ├── scene_flow_8x_2stage.py │ └── scene_flow_8x_4stage.py ├── dmb ├── __init__.py ├── apis │ ├── __init__.py │ ├── inference.py │ └── train.py ├── data │ ├── README.md │ ├── __init__.py │ ├── datasets │ │ ├── __init__.py │ │ ├── evaluation │ │ │ ├── __init__.py │ │ │ ├── flow │ │ │ │ ├── __init__.py │ │ │ │ ├── eval.py │ │ │ │ ├── eval_hooks.py │ │ │ │ └── pixel_error.py │ │ │ └── stereo │ │ │ │ ├── __init__.py │ │ │ │ ├── eval.py │ │ │ │ ├── eval_hooks.py │ │ │ │ └── pixel_error.py │ │ ├── flow │ │ │ ├── __init__.py │ │ │ ├── base.py │ │ │ ├── builder.py │ │ │ └── flying_chairs │ │ │ │ ├── __init__.py │ │ │ │ └── base.py │ │ ├── stereo │ │ │ ├── __init__.py │ │ │ ├── base.py │ │ │ ├── builder.py │ │ │ ├── kitti │ │ │ │ ├── __init__.py │ │ │ │ ├── base.py │ │ │ │ ├── kitti_2012.py │ │ │ │ └── kitti_2015.py │ │ │ └── scene_flow │ │ │ │ ├── __init__.py │ │ │ │ └── base.py │ │ └── utils │ │ │ ├── __init__.py │ │ │ ├── load_disp.py │ │ │ └── load_flow.py │ ├── loaders │ │ ├── __init__.py │ │ ├── builder.py │ │ └── samplers.py │ └── transforms │ │ ├── __init__.py │ │ ├── builder.py │ │ ├── flow_trans.py │ │ ├── stereo_trans.py │ │ └── transforms.py ├── modeling │ ├── __init__.py │ ├── flow │ │ ├── __init__.py │ │ └── models │ │ │ └── __init__.py │ └── stereo │ │ ├── __init__.py │ │ ├── backbones │ │ ├── AnyNet.py │ │ ├── DeepPruner.py │ │ ├── GCNet.py │ │ ├── PSMNet.py │ │ ├── StereoNet.py │ │ ├── __init__.py │ │ ├── backbones.py │ │ └── utils │ │ │ ├── DenseASPP.py │ │ │ └── __init__.py │ │ ├── cmn │ │ ├── __init__.py │ │ ├── cmn.py │ │ └── loss.py │ │ ├── conf_measure │ │ ├── __init__.py │ │ ├── cal_conf.py │ │ ├── conf_net.py │ │ └── gen_conf.py │ │ ├── cost_processors │ │ ├── AnyNet.py │ │ ├── DeepPruner.py │ │ ├── __init__.py │ │ ├── aggregators │ │ │ ├── AcfNet.py │ │ │ ├── AnyNet.py │ │ │ ├── DeepPruner.py │ │ │ ├── GCNet.py │ │ │ ├── PSMNet.py │ │ │ ├── StereoNet.py │ │ │ ├── __init__.py │ │ │ └── builder.py │ │ ├── builder.py │ │ └── utils │ │ │ ├── __init__.py │ │ │ ├── cat_fms.py │ │ │ ├── correlation1d_cost.py │ │ │ ├── cost_norm.py │ │ │ ├── dif_fms.py │ │ │ ├── hourglass.py │ │ │ ├── hourglass_2d.py │ │ │ └── hw_hourglass.py │ │ ├── disp_predictors │ │ ├── __init__.py │ │ ├── builder.py │ │ ├── faster_soft_argmin.py │ │ ├── local_soft_argmin.py │ │ └── soft_argmin.py │ │ ├── disp_refinement │ │ ├── AnyNet.py │ │ ├── DeepPruner.py │ │ ├── StereoNet.py │ │ ├── __init__.py │ │ ├── builder.py │ │ └── utils │ │ │ ├── __init__.py │ │ │ ├── edge_aware.py │ │ │ └── min_warp_error.py │ │ ├── disp_samplers │ │ ├── DeepPruner.py │ │ ├── __init__.py │ │ ├── builder.py │ │ └── utils │ │ │ ├── __init__.py │ │ │ └── patch_match.py │ │ ├── layers │ │ ├── __init__.py │ │ ├── basic_layers.py │ │ ├── bilateral_filter.py │ │ ├── cspn.py │ │ ├── dilated_hourglass.py │ │ ├── inverse_warp.py │ │ └── inverse_warp_3d.py │ │ ├── losses │ │ ├── __init__.py │ │ ├── builder.py │ │ ├── conf_nll_loss.py │ │ ├── gerf_loss.py │ │ ├── inverse_warp_loss.py │ │ ├── relative_loss.py │ │ ├── smooth_l1_loss.py │ │ ├── stereo_focal_loss.py │ │ └── utils │ │ │ ├── __init__.py │ │ │ ├── disp2prob.py │ │ │ ├── quantile_loss.py │ │ │ └── ssim.py │ │ ├── models │ │ ├── AnyNet.py │ │ ├── DeepPruner.py │ │ ├── __init__.py │ │ └── general_stereo_model.py │ │ └── registry.py ├── ops │ ├── __init__.py │ └── spn │ │ ├── __init__.py │ │ ├── functions │ │ ├── __init__.py │ │ └── gaterecurrent2dnoind.py │ │ ├── gaterecurrent2dnoind_cuda.egg-info │ │ ├── PKG-INFO │ │ ├── SOURCES.txt │ │ ├── dependency_links.txt │ │ └── top_level.txt │ │ ├── modules │ │ ├── __init__.py │ │ └── gaterecurrent2dnoind.py │ │ ├── setup.py │ │ └── src │ │ ├── gaterecurrent2dnoind_cuda.cpp │ │ ├── gaterecurrent2dnoind_cuda.h │ │ ├── gaterecurrent2dnoind_kernel.cu │ │ └── gaterecurrent2dnoind_kernel.h ├── utils │ ├── __init__.py │ ├── collect_env.py │ ├── dist_utils.py │ ├── env.py │ ├── fp16 │ │ ├── __init__.py │ │ ├── decorators.py │ │ ├── hooks.py │ │ └── utils.py │ ├── registry.py │ ├── solver.py │ ├── tensorboard_logger.py │ └── text_logger.py └── visualization │ ├── __init__.py │ ├── flow │ ├── __init__.py │ ├── save_result.py │ ├── show_result.py │ ├── vis.py │ └── vis_hooks.py │ └── stereo │ ├── __init__.py │ ├── save_result.py │ ├── show_result.py │ ├── sparsification_plot.py │ ├── vis.py │ └── vis_hooks.py ├── requirements.txt ├── setup.py ├── tests ├── data │ └── datasets │ │ ├── flow │ │ ├── test_flying_chairs.py │ │ └── yaml_to_json.py │ │ └── stereo │ │ ├── test_kitti.py │ │ └── test_scene_flow.py └── modeling │ └── stereo │ ├── backbones │ └── test_backbones.py │ ├── cost_processors │ └── utils │ │ └── test_cat_fms.py │ ├── disp_predictors │ └── test_disp_predictors.py │ ├── losses │ ├── test_stereo_focal_loss.py │ └── utils │ │ └── test_disp2prob.py │ └── models │ └── test_model.py └── tools ├── UI.py ├── UI.ui ├── datasets ├── gen_kitti2015_anns.py └── gen_sceneflow_anns.py ├── demo.py ├── demo.sh ├── demo_data ├── disparity │ ├── left │ │ ├── 0.pfm │ │ └── 4.pfm │ └── right │ │ ├── 0.pfm │ │ └── 4.pfm └── images │ ├── left │ ├── 0.png │ └── 4.png │ └── right │ ├── 0.png │ └── 4.png ├── dist_test.sh ├── dist_train.sh ├── test.py ├── train.py └── view_cost.py /.gitignore: -------------------------------------------------------------------------------- 1 | # compilation and distribution 2 | __pycache__ 3 | _ext 4 | *.pyc 5 | *.so 6 | dmb.egg-info/ 7 | build/ 8 | dist/ 9 | 10 | # pytorch/python/numpy formats 11 | *.pth 12 | *.pkl 13 | *.npy 14 | 15 | # ipython/jupyter notebooks 16 | *.ipynb 17 | **/.ipynb_checkpoints/ 18 | 19 | # Editor temporaries 20 | *.swn 21 | *.swo 22 | *.swp 23 | *~ 24 | 25 | # Pycharm editor settings 26 | .idea 27 | 28 | # vscode editor settings 29 | .vscode 30 | 31 | # MacOS 32 | .DS_Store 33 | -------------------------------------------------------------------------------- /DATA.md: -------------------------------------------------------------------------------- 1 | ### Prepare Scene Flow and KITTI dataset. 2 | 3 | It is recommended to symlink the dataset root to `$DenseMatchingBenchmark/datasets/`. Related preparing tools for json file generation can be found in [tools](tools/datasets) 4 | 5 | ``` 6 | ├── KITTI-2012 7 | │   └── data_stereo_flow 8 | │   ├── testing 9 | │   └── training 10 | ├── KITTI-2015 11 | │   ├── calib 12 | │   ├── devkit 13 | │   ├── testing 14 | │   │   ├── image_2 15 | │   │   └── image_3 16 | │   └── training 17 | │   ├── disp_noc_0 18 | │   ├── disp_noc_1 19 | │   ├── image_2 20 | │   └── image_3 21 | └── SceneFlow 22 |   ├── calib 23 |    ├── driving 24 |    │   ├── disparity 25 |    │   ├── frames_cleanpass 26 |    │   └── frames_finalpass 27 |    ├── flyingthings3d 28 |    │   ├── disparity 29 |    │   ├── frames_cleanpass 30 |    │   └── frames_finalpass 31 |    └── Monkaa 32 |    ├── disparity 33 |    ├── frames_cleanpass 34 |    └── frames_finalpass 35 | 36 | 37 | ``` 38 | 39 | ### Prepare visualization dataset. 40 | 41 | We enable evaluation and visualization for each epoch. Especially, the visualization means visualize the estimated results. 42 | 43 | It is recommended to download the visualization data we prepared, btw, you can also prepare by yourself. 44 | 45 | #### How To Use 46 | 47 | To use, you just have to make the param 'data=dict(vis=...)' in config file valid. 48 | 49 | #### Down Link 50 | The down-link for visualization data including: 51 | 1. Baidu YunPan: https://pan.baidu.com/s/1J7OBum7-kTFQV3Sbr3qT4w password: 0q8y 52 | 2. Google Drive: https://drive.google.com/open?id=1oroPkS9bYBULvRW2olpA2wLgKSxU9Ovl 53 | 54 | 55 | ``` 56 | visualization_data 57 | ├── KITTI-2015 58 | │ ├── annotations 59 | │ ├── calib 60 | │ ├── disparity 61 | │ ├── genVisKITTI2015AnnoFile.py 62 | │ ├── genVisKITTIVOAnnoFile.py 63 | │ ├── images 64 | │ └── velodyne_points 65 | └── SceneFlow 66 | ├── __init__.py 67 | ├── annotations 68 | ├── disparity 69 | ├── genVisSFAnnoFile.py 70 | ├── images 71 | ├── occ 72 | └── readme.txt 73 | ``` 74 | -------------------------------------------------------------------------------- /INSTALL.md: -------------------------------------------------------------------------------- 1 | ## Installation 2 | 3 | ### Requirements: 4 | - PyTorch1.7.1+, Python3.8+, Cuda11.0+ 5 | - mmcv==1.3.5 6 | 7 | We have verified with Pytorch==1.7.1, the code can train/val/test normally. But with limited GPU resources, experiments are not runned again. 8 | 9 | So, if you find worse result, you can checkout to 177c56ca1952f54d28e6073afa2c16981113a2af 10 | ```bash 11 | cd $DenseMatchingBenchmark 12 | git checkout 177c56ca1952f54d28e6073afa2c16981113a2af 13 | ``` 14 | 15 | ### Option 1: Step-by-step installation 16 | 17 | a. Create a conda virtual environment and activate it. Then install required packages. 18 | 19 | 20 | ```bash 21 | # first, make sure that your conda is setup properly with the right environment 22 | # for that, check that `which conda`, `which pip` and `which python` points to the 23 | # right path. From a clean conda env, this is what you need to do 24 | 25 | conda create --name dense_matching_benchmark python=3.8 26 | conda activate dense_matching_benchmark 27 | 28 | # this installs the right pip and dependencies for the fresh python 29 | conda install ipython 30 | conda install pip 31 | 32 | # install required packages from requirements.txt 33 | pip install -r requirements.txt 34 | ``` 35 | 36 | b. Install PyTorch stable or nightly and torchvision following the [official instructions](https://pytorch.org/). 37 | 38 | c0. Install detectron2 39 | ```bash 40 | python -m pip install detectron2 -f \ 41 | https://dl.fbaipublicfiles.com/detectron2/wheels/cu110/torch1.7/index.html 42 | ``` 43 | 44 | c1. Install apex 45 | ```bash 46 | # optional step: 47 | export CUDA_HOME=/usr/local/cuda-x.x/ 48 | # where x.x corresponds to your CUDA version used to install pytorch 49 | 50 | git clone https://github.com/NVIDIA/apex.git 51 | cd apex 52 | python setup.py install --cuda_ext --cpp_ext 53 | ``` 54 | *Potential ISSUE*: [complie error](https://github.com/NVIDIA/apex/issues/802#issuecomment-618699214) 55 | ``` 56 | # optional step: 57 | git checkout f3a960f80244cf9e80558ab30f7f7e8cbf03c0a0 58 | ``` 59 | 60 | c2. Install [spatial-correlation-sampler](https://github.com/ClementPinard/Pytorch-Correlation-extension) 61 | ```bash 62 | git clone https://github.com/ClementPinard/Pytorch-Correlation-extension 63 | git checkout fix_1.7 64 | cd Pytorch-Correlation-extension 65 | python setup.py install 66 | ``` 67 | 68 | 69 | d. Clone the DenseMatchingBenchmark repository. 70 | 71 | ```bash 72 | git clone https://github.com/DeepMotionAIResearch/DenseMatchingBenchmark.git 73 | cd DenseMatchingBenchmark 74 | ``` 75 | 76 | e. Install DenseMatchingBenchmark(other dependencies will be installed optionally). 77 | ```bash 78 | # libs include: dmb, spn 79 | 80 | # the $1 can be: 'all', 'dmb', 'spn' 81 | # => install all libs or specific lib, e.g. dmb 82 | 83 | # the $2 can be: 'install' 84 | # => if 'install' given, the libs will be installed into site-packages 85 | # => if not given, the libs will be install with symbolic links, 86 | # => so that you can modify the files if you want and won't need to re-build it 87 | 88 | bash INSTALL.sh $1 $2 89 | 90 | # recommend install instruction: 91 | 92 | bash INSTALL.sh all 93 | 94 | ``` 95 | 96 | ### Prepare data 97 | 98 | Data prepare please refer to [DATA.md](DATA.md) 99 | 100 | 101 | 102 | ### Notice 103 | You can run `python(3) setup.py develop` or `pip install -e .` to install DenseMatchingBenchmark if you want to make modifications to it frequently. 104 | 105 | If there are more than one DenseMatchingBenchmark on your machine, and you want to use them alternatively. 106 | Please insert the following code to the main file 107 | ```python 108 | import os.path as osp 109 | import sys 110 | sys.path.insert(0, osp.join(osp.dirname(osp.abspath(__file__)), '../')) 111 | ``` 112 | or run the following command in the terminal of corresponding folder. 113 | ```shell 114 | export PYTHONPATH=`pwd`:$PYTHONPATH 115 | ``` 116 | 117 | 118 | 119 | -------------------------------------------------------------------------------- /INSTALL.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | if [[ $1 == 'spn' || $1 == 'all' ]] 4 | then 5 | 6 | cd dmb/ops/spn/ 7 | 8 | python setup.py clean 9 | rm -rf build 10 | rm -r dist 11 | rm -r *.egg-info 12 | 13 | if [[ $2 == 'install' ]] 14 | then 15 | python setup.py install 16 | else 17 | python setup.py build develop 18 | fi 19 | 20 | cd ../../../ 21 | 22 | echo "*********************************************************************" 23 | echo " SPN installed!" 24 | echo "*********************************************************************" 25 | 26 | fi 27 | 28 | 29 | if [[ $1 == 'dmb' || $1 == 'all' ]] 30 | then 31 | 32 | python setup.py clean 33 | rm -r build 34 | rm -r dist 35 | rm -r *.egg-info 36 | 37 | if [[ $2 == 'install' ]] 38 | then 39 | python setup.py install 40 | else 41 | python setup.py build develop 42 | fi 43 | 44 | echo "*********************************************************************" 45 | echo " dmb installed!" 46 | echo "*********************************************************************" 47 | 48 | fi 49 | 50 | 51 | echo "*********************************************************************" 52 | echo " Dense Matching Benchmark Installed!" 53 | echo "*********************************************************************" 54 | 55 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018 Facebook 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | 23 | 24 | -------------------------------------------------------------------------------- /configs/AnyNet/ResultOfAnyNet.md: -------------------------------------------------------------------------------- 1 | # Result of AnyNet 2 | 3 | ## Model Info 4 | 5 | ``` 6 | @article{wang2018anytime, 7 | title={Anytime Stereo Image Depth Estimation on Mobile Devices}, 8 | author={Wang, Yan and Lai, Zihang and Huang, Gao and Wang, Brian H. and Van Der Maaten, Laurens and Campbell, Mark and Weinberger, Kilian Q}, 9 | journal={arXiv preprint arXiv:1810.11408}, 10 | year={2018} 11 | } 12 | ``` 13 | 14 | * Note: Test on GTX1080Ti, with resolution 384x1248. 15 | 16 | | Model Name | FLOPS | Parameters | FPS | Time(ms) | 17 | |:---------------------:|:---------:|:----------:|:----:|:--------:| 18 | | AnyNet | 1.476G | 46.987K | 19 | 20 | 21 | 22 | ## Experiment 23 | 24 | 25 | **hints** 26 | 27 | * batch size: n * m, n GPUs m batch/GPU 28 | * pass: clean pass or final pass of Scene Flow dataset, default clean pass 29 | * weight init: initialize the convolution/bn layer while training from scratch, default no initialization 30 | * synced bn: weather use synced bn provided by apex, default False 31 | * float16: weather use mixture precision training with level 01 provided by apex, default False 32 | * scale loss: the loss scale factor when using apex 33 | * time: time consuming including the training and evaluating time, in format: x h(hour) y m(minute) 34 | * EPE: end-point-error for SceneFlow 35 | * D1(all): 3PE(px) & 5% for KITTI 2015 36 | 37 | 38 | ### SceneFlow 39 | 40 | RMSProp, lr(20 epochs) schedule: 1-20 with lr\*1 41 | 42 | - Inference with 1 GPU takes long time 43 | - Although training for 20 epochs, but we find epoch=12 get the best result 44 | 45 | | lr |batch size |weight init| synced bn | float16 |loss scale | EPE(px)| time | BaiDuYun | GoogleDrive | 46 | |:-----:|:---------:|:---------:|:---------:|:---------:|:---------:|:------:|:------:|:--------:|:-----------:| 47 | | 0.0005| 1*6 | ✗ | ✓ | ✗ | ✗ | 3.190 | 14h12m | [link][1], pw: dtff| [link][2] | 48 | 49 | 50 | 51 | ### KITTI-2015 52 | 53 | | lr |batch size |weight init| synced bn | float16 |loss scale | D1(all) | time | BaiDuYun | GoogleDrive | 54 | |:-----:|:---------:|:---------:|:---------:|:---------:|:---------:|:--------:|:------:|:--------:|:-----------:| 55 | | 0.001 | 1*6 | ✗ | ✓ | ✗ | ✗ | 56 | 57 | 58 | 59 | [1]: https://pan.baidu.com/s/10bP0TXCXHcdIg49Fv13H7Q 60 | [2]: https://drive.google.com/open?id=1_5hBOfKwg_TnMFvZr4qEkU0bEwRoRlxL 61 | -------------------------------------------------------------------------------- /configs/DeepPruner/ResultOfDeepPruner.md: -------------------------------------------------------------------------------- 1 | # Result of DeepPruner 2 | 3 | ## Model Info 4 | 5 | ``` 6 | @inproceedings{Duggal2019ICCV, 7 | title = {DeepPruner: Learning Efficient Stereo Matching via Differentiable PatchMatch}, 8 | author = {Shivam Duggal and Shenlong Wang and Wei-Chiu Ma and Rui Hu and Raquel Urtasun}, 9 | booktitle = {ICCV}, 10 | year = {2019} 11 | } 12 | ``` 13 | 14 | * Note: Test on GTX1080Ti, with resolution 384x1280. 15 | 16 | | Model Name | FLOPS | Parameters | FPS | Time(ms) | 17 | |:---------------------:|:---------:|:----------:|:----:|:--------:| 18 | | DeepPruner-4x | 472.125G | 7.390M | 3.42| 292.4 | 19 | | DeepPruner-8x | 194.181G | 7.470M | 7.67| 130.4 | 20 | 21 | 22 | 23 | ## Experiment 24 | 25 | 26 | **hints** 27 | 28 | * batch size: n * m, n GPUs m batch/GPU 29 | * pass: clean pass or final pass of Scene Flow dataset, default clean pass 30 | * weight init: initialize the convolution/bn layer while training from scratch, default no initialization 31 | * synced bn: weather use synced bn provided by apex, default False 32 | * float16: weather use mixture precision training with level 01 provided by apex, default False 33 | * scale loss: the loss scale factor when using apex 34 | * time: time consuming including the training and evaluating time, in format: x h(hour) y m(minute) 35 | * EPE: end-point-error for SceneFlow 36 | * D1(all): 3PE(px) & 5% for KITTI 2015 37 | 38 | 39 | ### SceneFlow 40 | 41 | RMSProp, lr(15 epochs) schedule: 1-10 with lr\*1 42 | 43 | 44 | | Model Name | lr |batch size |weight init| synced bn | float16 |loss scale | EPE(px)| time | BaiDuYun | GoogleDrive | 45 | |:---------------------:|:-----:|:---------:|:---------:|:---------:|:---------:|:---------:|:------:|:-----:|:--------:|:-----------:| 46 | | DeepPruner-4x | 0.001 | 4*2 | ✗ | ✓ | ✗ | ✗ | 47 | 48 | 49 | 50 | ### KITTI-2015 51 | 52 | | lr |batch size | synced bn |loss scale | 3PE(px) & 5% | 53 | |:-----:|:---------:|:---------:|:---------:|:------------:| 54 | | 0.001 | 4*3 | ✓ | ✗ | 55 | -------------------------------------------------------------------------------- /configs/PSMNet/ResultOfPSMNet.md: -------------------------------------------------------------------------------- 1 | # Result of PSMNet 2 | 3 | ``` 4 | @inproceedings{chang2018pyramid, 5 | title={Pyramid Stereo Matching Network}, 6 | author={Chang, Jia-Ren and Chen, Yong-Sheng}, 7 | booktitle={Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition}, 8 | pages={5410--5418}, 9 | year={2018} 10 | } 11 | ``` 12 | 13 | ## Model Info 14 | 15 | * Note: Test on GTX1080Ti, with resolution 384x1248. 16 | 17 | | Model Name | FLOPS | Parameters | FPS | Time(ms) | 18 | |:---------------------:|:---------:|:----------:|:----:|:--------:| 19 | | PSMNet | 938.186G | 5.225M | 1.67 | 599.2 | 20 | 21 | 22 | 23 | ## Experiment 24 | 25 | 26 | **hints** 27 | 28 | * batch size: n * m, n GPUs m batch/GPU 29 | * pass: clean pass or final pass of Scene Flow dataset, default clean pass 30 | * weight init: initialize the convolution/bn layer while training from scratch, default no initialization 31 | * synced bn: weather use synced bn provided by apex, default False 32 | * float16: weather use mixture precision training with level 01 provided by apex, default False 33 | * scale loss: the loss scale factor when using apex 34 | * time: time consuming including the training and evaluating time, in format: x h(hour) y m(minute) 35 | * EPE: end-point-error for SceneFlow 36 | * D1(all): 3PE(px) & 5% for KITTI 2015 37 | 38 | 39 | ### SceneFlow 40 | 41 | RMSProp, lr(10 epochs) schedule: 1-10 with lr\*1 42 | 43 | 44 | | lr |batch size |weight init| synced bn | float16 |loss scale | EPE(px)| time | BaiDuYun | GoogleDrive | 45 | |:-----:|:---------:|:---------:|:---------:|:---------:|:---------:|:------:|:------:|:--------:|:-----------:| 46 | | 0.001 | 4*3 | ✗ | ✓ | ✗ | ✗ | 1.112 | 22h44m | [link][1], pw: 0kxt| [link][3] | 47 | 48 | 49 | 50 | ### KITTI-2015 51 | 52 | | lr |batch size |weight init| synced bn | float16 |loss scale | D1(all) | time | BaiDuYun | GoogleDrive | 53 | |:-----:|:---------:|:---------:|:---------:|:---------:|:---------:|:--------:|:------:|:--------:|:-----------:| 54 | | 0.001 | 4*3 | ✗ | ✓ | ✗ | ✗ | 2.33 | 15h15m | [link][2], pw: odt8| [link][4] | 55 | 56 | 57 | 58 | [1]: https://pan.baidu.com/s/1e693uEuNK6uAg3OZstDJVQ 59 | [2]: https://pan.baidu.com/s/1XnrtztXY9og3-JtBrLEGyA 60 | [3]: https://drive.google.com/open?id=1aPJiGkt9P2Lt0UCcM817YjONV2DRDEBH 61 | [4]: https://drive.google.com/drive/folders/1T__OTsViq5tkstm7jKV6p9wSs96EYUGw?usp=sharing 62 | -------------------------------------------------------------------------------- /configs/StereoNet/ResultOfStereoNet.md: -------------------------------------------------------------------------------- 1 | # Result of StereoNet 2 | 3 | ## Model Info 4 | 5 | ``` 6 | @inproceedings{khamis2018stereonet, 7 | title={Stereonet: Guided hierarchical refinement for real-time edge-aware depth prediction}, 8 | author={Khamis, Sameh and Fanello, Sean and Rhemann, Christoph and Kowdle, Adarsh and Valentin, Julien and Izadi, Shahram}, 9 | booktitle={Proceedings of the European Conference on Computer Vision (ECCV)}, 10 | pages={573--590}, 11 | year={2018} 12 | } 13 | 14 | @inproceedings{zhang2018activestereonet, 15 | title={Activestereonet: End-to-end self-supervised learning for active stereo systems}, 16 | author={Zhang, Yinda and Khamis, Sameh and Rhemann, Christoph and Valentin, Julien and Kowdle, Adarsh and Tankovich, Vladimir and Schoenberg, Michael and Izadi, Shahram and Funkhouser, Thomas and Fanello, Sean}, 17 | booktitle={Proceedings of the European Conference on Computer Vision (ECCV)}, 18 | pages={784--801}, 19 | year={2018} 20 | } 21 | 22 | ``` 23 | 24 | * Note: Test on GTX1080Ti, with resolution 384x1248. 25 | 26 | | Model Name | FLOPS | Parameters | FPS | Time(ms) | 27 | |:---------------------:|:---------:|:----------:|:----:|:--------:| 28 | | StereoNet-8X-2stage | 78.512G | 399.066K | 19.17| 52.2 | 29 | | StereoNet-8X-4stage | 186.719G | 624.860K | 8.54| 117.0 | 30 | 31 | 32 | 33 | ## Experiment 34 | 35 | 36 | **hints** 37 | 38 | * batch size: n * m, n GPUs m batch/GPU 39 | * pass: clean pass or final pass of Scene Flow dataset, default clean pass 40 | * weight init: initialize the convolution/bn layer while training from scratch, default no initialization 41 | * synced bn: weather use synced bn provided by apex, default False 42 | * float16: weather use mixture precision training with level 01 provided by apex, default False 43 | * scale loss: the loss scale factor when using apex 44 | * time: time consuming including the training and evaluating time, in format: x h(hour) y m(minute) 45 | * EPE: end-point-error for SceneFlow 46 | * D1(all): 3PE(px) & 5% for KITTI 2015 47 | 48 | 49 | ### SceneFlow 50 | 51 | RMSProp, lr(11 epochs) schedule: 1-11 with lr\*1 52 | 53 | - Inference with 1 GPU takes long time 54 | 55 | | Model Name | lr |batch size |weight init| synced bn | float16 |loss scale | EPE(px)| time | BaiDuYun | GoogleDrive | 56 | |:---------------------:|:-----:|:---------:|:---------:|:---------:|:---------:|:---------:|:------:|:------:|:--------:|:-----------:| 57 | | StereoNet-8X-2stage | 0.001 | 1*4 | ✗ | ✓ | ✗ | ✗ | 1.533 | 40h56m |[link][1], pw: rza0 | [link][2]| 58 | | StereoNet-8X-4stage | 0.001 | 1*4 | ✗ | ✓ | ✗ | ✗ | 1.329 | 143h45m|[link][3], pw: gpjm | [link][4]| 59 | 60 | 61 | 62 | ### KITTI-2015 63 | 64 | | lr |batch size | synced bn |loss scale | 3PE(px) & 5% | 65 | |:-----:|:---------:|:---------:|:---------:|:------------:| 66 | | 0.001 | 1*4 | ✓ | ✗ | 67 | 68 | 69 | [1]: https://pan.baidu.com/s/1cuvjEETJUnpnxy_pFqiTRw 70 | [2]: https://drive.google.com/open?id=1cuXzQDfQ28a9gmSJichaIGYsEITGp_Qh 71 | [3]: https://pan.baidu.com/s/13DOhuuvqvNL9ksg5_85GEw 72 | [4]: https://drive.google.com/open?id=10TYF5SqN26-GsVIf2ytXALbNMBgOLH_1 73 | -------------------------------------------------------------------------------- /dmb/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DeepMotionAIResearch/DenseMatchingBenchmark/010aeb66e3ceaf3d866036b0ca751861df39432d/dmb/__init__.py -------------------------------------------------------------------------------- /dmb/apis/__init__.py: -------------------------------------------------------------------------------- 1 | from .train import train_matcher 2 | from .inference import inference_stereo 3 | -------------------------------------------------------------------------------- /dmb/data/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DeepMotionAIResearch/DenseMatchingBenchmark/010aeb66e3ceaf3d866036b0ca751861df39432d/dmb/data/README.md -------------------------------------------------------------------------------- /dmb/data/__init__.py: -------------------------------------------------------------------------------- 1 | from .loaders import build_data_loader 2 | -------------------------------------------------------------------------------- /dmb/data/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | from .stereo import build_stereo_dataset 2 | from .stereo import SceneFlowDataset, Kitti2012Dataset, Kitti2015Dataset 3 | 4 | from .flow import build_flow_dataset 5 | from .flow import FlyingChairsDataset 6 | 7 | def build_dataset(cfg, type): 8 | task = cfg.get('task', 'stereo') 9 | if task == 'stereo': 10 | return build_stereo_dataset(cfg, type) 11 | elif task == 'flow': 12 | return build_flow_dataset(cfg, type) 13 | -------------------------------------------------------------------------------- /dmb/data/datasets/evaluation/__init__.py: -------------------------------------------------------------------------------- 1 | from .flow import flow_output_evaluation_in_pandas 2 | from .stereo import disp_output_evaluation_in_pandas 3 | 4 | def output_evaluation_in_pandas(output_dict, task='stereo'): 5 | if task == 'stereo': 6 | return disp_output_evaluation_in_pandas(output_dict) 7 | elif task == 'flow': 8 | return flow_output_evaluation_in_pandas(output_dict) 9 | else: 10 | raise NotImplementedError 11 | 12 | 13 | from .flow import remove_padding as flow_remove_padding 14 | from .stereo import remove_padding as disp_remove_padding 15 | 16 | def calc_error(batch, size, task='stereo'): 17 | if task == 'stereo': 18 | return disp_remove_padding(batch, size) 19 | elif task == 'flow': 20 | return flow_remove_padding(batch, size) 21 | else: 22 | raise NotImplementedError 23 | -------------------------------------------------------------------------------- /dmb/data/datasets/evaluation/flow/__init__.py: -------------------------------------------------------------------------------- 1 | from .pixel_error import calc_error 2 | from .eval import do_evaluation, remove_padding 3 | from .eval_hooks import DistFlowEvalHook, flow_output_evaluation_in_pandas -------------------------------------------------------------------------------- /dmb/data/datasets/evaluation/flow/eval.py: -------------------------------------------------------------------------------- 1 | import warnings 2 | from collections import abc as container_abcs 3 | 4 | import torch 5 | 6 | from dmb.data.datasets.evaluation.flow.pixel_error import calc_error 7 | 8 | 9 | def remove_padding(batch, size): 10 | """ 11 | Usually, the SceneFlow image size is [540, 960], and we often pad it to [544, 960] evaluation, 12 | What's more, for KITTI, the image size is pad to [384, 1248] 13 | Here, we mainly remove the padding from the estimated tensor, such as flow map 14 | Args: 15 | batch (torch.Tensor): in [BatchSize, Channel, Height, Width] layout 16 | size (list, tuple): the last two dimensions are desired [Height, Width] 17 | """ 18 | error_msg = "batch must contain tensors, dicts or lists; found {}" 19 | if isinstance(batch, torch.Tensor): 20 | # Crop batch to desired size 21 | # For flow, we often pad image around and keep it in the center 22 | assert batch.shape[-2] >= size[-2] and batch.shape[-1] >= size[-1] 23 | 24 | pad_top = (batch.shape[-2] - size[-2])//2 25 | pad_left = (batch.shape[-1] - size[-1])//2 26 | # pad_right = batch.shape[-1] - size[-1] 27 | batch = batch[:, :, pad_top:, pad_left:] 28 | 29 | return batch 30 | elif isinstance(batch, container_abcs.Mapping): 31 | return {key: remove_padding(batch[key], size) for key in batch} 32 | elif isinstance(batch, container_abcs.Sequence): 33 | return [remove_padding(samples, size) for samples in batch] 34 | 35 | raise TypeError((error_msg.format(type(batch)))) 36 | 37 | 38 | def do_evaluation(est_flow, gt_flow, sparse=False): 39 | """ 40 | Do pixel error evaluation. (See KITTI evaluation protocols for details.) 41 | Args: 42 | est_flow, (Tensor): estimated flow map, in [BatchSize, 2, Height, Width] or 43 | [2, Height, Width] layout 44 | gt_flow, (Tensor): ground truth flow map, in [BatchSize, 2, Height, Width] or 45 | [2, Height, Width]layout 46 | sparse, (bool): whether the given flow is sparse, default False 47 | 48 | Returns: 49 | error_dict (dict): the error of 1px, 2px, 3px, 5px, in percent, 50 | range [0,100] and average error epe 51 | """ 52 | error_dict = {} 53 | if est_flow is None: 54 | warnings.warn('Estimated flow map is None') 55 | return error_dict 56 | if gt_flow is None: 57 | warnings.warn('Reference ground truth flow map is None') 58 | return error_dict 59 | 60 | if torch.is_tensor(est_flow): 61 | est_flow = est_flow.clone().cpu() 62 | 63 | if torch.is_tensor(gt_flow): 64 | gt_flow = gt_flow.clone().cpu() 65 | 66 | error_dict = calc_error(est_flow, gt_flow, sparse=sparse) 67 | 68 | return error_dict 69 | 70 | 71 | -------------------------------------------------------------------------------- /dmb/data/datasets/evaluation/flow/pixel_error.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | import torch 4 | 5 | def zero_mask(input, eps=1e-12): 6 | mask = abs(input) < eps 7 | return mask 8 | 9 | def calc_error(est_flow=None, gt_flow=None, sparse=False): 10 | """ 11 | Args: 12 | est_flow (Tensor): in [BatchSize, 2, Height, Width] or 13 | [2, Height, Width] layout 14 | gt_flow (Tensor): in [BatchSize, 2, Height, Width] or 15 | [2, Height, Width] layout 16 | sparse, (bool): whether the given flow is sparse, default False 17 | Output: 18 | dict: the error of 1px, 2px, 3px, 5px, in percent, 19 | range [0,100] and average error epe 20 | """ 21 | error1 = torch.Tensor([0.]) 22 | error2 = torch.Tensor([0.]) 23 | error3 = torch.Tensor([0.]) 24 | error5 = torch.Tensor([0.]) 25 | epe = torch.Tensor([0.]) 26 | 27 | if (not torch.is_tensor(est_flow)) or (not torch.is_tensor(gt_flow)): 28 | return { 29 | '1px': error1 * 100, 30 | '2px': error2 * 100, 31 | '3px': error3 * 100, 32 | '5px': error5 * 100, 33 | 'epe': epe 34 | } 35 | 36 | assert torch.is_tensor(est_flow) and torch.is_tensor(gt_flow) 37 | assert est_flow.shape == gt_flow.shape 38 | 39 | est_flow = est_flow.clone().cpu() 40 | gt_flow = gt_flow.clone().cpu() 41 | if len(gt_flow.shape) == 3: 42 | gt_flow = gt_flow.unsqueeze(0) 43 | est_flow = est_flow.unsqueeze(0) 44 | 45 | assert gt_flow.shape[1] == 2, "flow should have horizontal and vertical dimension, " \ 46 | "but got {}".format(gt_flow.shape[1]) 47 | 48 | # [B, 1, H, W] 49 | gt_u, gt_v = gt_flow[:, 0:1, :, :], gt_flow[:, 1:2, :, :] 50 | est_u, est_v = est_flow[:, 0:1, :, :], est_flow[:, 1:2, :, :] 51 | 52 | # get valid mask 53 | # [B, 1, H, W] 54 | mask = torch.ones(gt_u.shape, dtype=torch.bool) 55 | if sparse: 56 | mask = mask & (~(zero_mask(gt_u) & zero_mask(gt_v))) 57 | mask = mask & (~(torch.isnan(gt_u) | torch.isnan(gt_v))) 58 | mask.detach_() 59 | if abs(mask.float().sum()) < 1.0: 60 | return { 61 | '1px': error1 * 100, 62 | '2px': error2 * 100, 63 | '3px': error3 * 100, 64 | '5px': error5 * 100, 65 | 'epe': epe 66 | } 67 | 68 | gt_u = gt_u[mask] 69 | gt_v = gt_v[mask] 70 | est_u = est_u[mask] 71 | est_v = est_v[mask] 72 | 73 | abs_error = torch.sqrt((gt_u - est_u)**2 + (gt_v - est_v)**2) 74 | total_num = mask.float().sum() 75 | 76 | error1 = torch.sum(torch.gt(abs_error, 1).float()) / total_num 77 | error2 = torch.sum(torch.gt(abs_error, 2).float()) / total_num 78 | error3 = torch.sum(torch.gt(abs_error, 3).float()) / total_num 79 | error5 = torch.sum(torch.gt(abs_error, 5).float()) / total_num 80 | epe = abs_error.float().mean() 81 | 82 | return { 83 | '1px': error1 * 100, 84 | '2px': error2 * 100, 85 | '3px': error3 * 100, 86 | '5px': error5 * 100, 87 | 'epe': epe 88 | } 89 | -------------------------------------------------------------------------------- /dmb/data/datasets/evaluation/stereo/__init__.py: -------------------------------------------------------------------------------- 1 | from .pixel_error import calc_error 2 | from .eval import do_evaluation, do_occlusion_evaluation, remove_padding 3 | from .eval_hooks import DistStereoEvalHook, disp_output_evaluation_in_pandas 4 | -------------------------------------------------------------------------------- /dmb/data/datasets/evaluation/stereo/pixel_error.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | import torch 4 | 5 | 6 | def calc_error(est_disp=None, gt_disp=None, lb=None, ub=None): 7 | """ 8 | Args: 9 | est_disp (Tensor): in [BatchSize, Channel, Height, Width] or 10 | [BatchSize, Height, Width] or [Height, Width] layout 11 | gt_disp (Tensor): in [BatchSize, Channel, Height, Width] or 12 | [BatchSize, Height, Width] or [Height, Width] layout 13 | lb (scalar): the lower bound of disparity you want to mask out 14 | ub (scalar): the upper bound of disparity you want to mask out 15 | Output: 16 | dict: the error of 1px, 2px, 3px, 5px, in percent, 17 | range [0,100] and average error epe 18 | """ 19 | error1 = torch.Tensor([0.]) 20 | error2 = torch.Tensor([0.]) 21 | error3 = torch.Tensor([0.]) 22 | error5 = torch.Tensor([0.]) 23 | epe = torch.Tensor([0.]) 24 | 25 | if (not torch.is_tensor(est_disp)) or (not torch.is_tensor(gt_disp)): 26 | return { 27 | '1px': error1 * 100, 28 | '2px': error2 * 100, 29 | '3px': error3 * 100, 30 | '5px': error5 * 100, 31 | 'epe': epe 32 | } 33 | 34 | assert torch.is_tensor(est_disp) and torch.is_tensor(gt_disp) 35 | assert est_disp.shape == gt_disp.shape 36 | 37 | est_disp = est_disp.clone().cpu() 38 | gt_disp = gt_disp.clone().cpu() 39 | 40 | mask = torch.ones(gt_disp.shape, dtype=torch.bool) 41 | if lb is not None: 42 | mask = mask & (gt_disp > lb) 43 | if ub is not None: 44 | mask = mask & (gt_disp < ub) 45 | mask.detach_() 46 | if abs(mask.float().sum()) < 1.0: 47 | return { 48 | '1px': error1 * 100, 49 | '2px': error2 * 100, 50 | '3px': error3 * 100, 51 | '5px': error5 * 100, 52 | 'epe': epe 53 | } 54 | 55 | gt_disp = gt_disp[mask] 56 | est_disp = est_disp[mask] 57 | 58 | abs_error = torch.abs(gt_disp - est_disp) 59 | total_num = mask.float().sum() 60 | 61 | error1 = torch.sum(torch.gt(abs_error, 1).float()) / total_num 62 | error2 = torch.sum(torch.gt(abs_error, 2).float()) / total_num 63 | error3 = torch.sum(torch.gt(abs_error, 3).float()) / total_num 64 | error5 = torch.sum(torch.gt(abs_error, 5).float()) / total_num 65 | epe = abs_error.float().mean() 66 | 67 | return { 68 | '1px': error1 * 100, 69 | '2px': error2 * 100, 70 | '3px': error3 * 100, 71 | '5px': error5 * 100, 72 | 'epe': epe 73 | } 74 | -------------------------------------------------------------------------------- /dmb/data/datasets/flow/__init__.py: -------------------------------------------------------------------------------- 1 | from .flying_chairs import FlyingChairsDataset 2 | 3 | from .builder import build_flow_dataset -------------------------------------------------------------------------------- /dmb/data/datasets/flow/base.py: -------------------------------------------------------------------------------- 1 | import os.path as osp 2 | import numpy as np 3 | import yaml 4 | 5 | from torch.utils.data import Dataset 6 | 7 | 8 | class FlowDatasetBase(Dataset): 9 | def __init__(self, annFile, root, transform): 10 | self.annFile = annFile 11 | self.root = root 12 | self.data_list = self.annLoader() 13 | 14 | # transforms for data augmentation 15 | self.transform = transform 16 | 17 | self.flag = np.zeros(len(self.data_list), dtype=np.int64) 18 | 19 | def annLoader(self): 20 | data_list = [] 21 | with open(file=self.annFile, mode='r') as fp: 22 | data_list.extend(yaml.load(fp, Loader=yaml.BaseLoader)) 23 | return data_list 24 | 25 | 26 | def Loader(self, item): 27 | raise NotImplementedError 28 | 29 | def __getitem__(self, idx): 30 | item = self.data_list[idx] 31 | sample = self.Loader(item) 32 | 33 | if self.transform is not None: 34 | sample = self.transform(sample) 35 | 36 | return sample 37 | 38 | def __len__(self): 39 | return len(self.data_list) 40 | 41 | def __repr__(self): 42 | repr_str = '{}\n'.format(self.__class__.__name__) 43 | repr_str += ' ' * 4 + 'Root: {}\n'.format(self.root) 44 | repr_str += ' ' * 4 + 'annFile: {}\n'.format(self.annFile) 45 | repr_str += ' ' * 4 + 'Length: {}\n'.format(self.__len__()) 46 | 47 | return repr_str 48 | 49 | @property 50 | def name(self): 51 | raise NotImplementedError 52 | -------------------------------------------------------------------------------- /dmb/data/datasets/flow/builder.py: -------------------------------------------------------------------------------- 1 | from dmb.data.transforms import Compose 2 | from dmb.data.transforms import flow_trans as T 3 | 4 | from dmb.data.datasets.flow import FlyingChairsDataset 5 | 6 | 7 | def build_transforms(cfg, type, is_train): 8 | input_shape = cfg.data[type].input_shape 9 | mean = cfg.data[type].mean 10 | std = cfg.data[type].std 11 | 12 | if is_train: 13 | transform = Compose( 14 | [ 15 | # T.RandomTranslate(10), 16 | # T.RandomRotate(angle=5, diff_angle=10), 17 | T.ToTensor(), 18 | T.RandomCrop(input_shape), 19 | # T.RandomHorizontalFlip(), 20 | # T.RandomVerticalFlip(), 21 | T.Normalize(mean, std), 22 | # T.ColorJitter(brightness=0.4, contrast=0.4, saturation=0.4), 23 | ] 24 | ) 25 | else: 26 | transform = Compose( 27 | [ 28 | T.ToTensor(), 29 | T.CenterCat(input_shape), 30 | T.Normalize(mean, std), 31 | ] 32 | ) 33 | 34 | return transform 35 | 36 | 37 | def build_flow_dataset(cfg, type): 38 | if type not in cfg.data: 39 | return None 40 | 41 | data_root = cfg.data[type].data_root 42 | data_type = cfg.data[type].type 43 | annFile = cfg.data[type].annfile 44 | 45 | is_train = True if type == 'train' else False 46 | transforms = build_transforms(cfg, type, is_train=is_train) 47 | 48 | if 'FlyingChairs' in data_type: 49 | dataset = FlyingChairsDataset(annFile, data_root, transforms) 50 | else: 51 | raise ValueError("invalid data type: {}".format(data_type)) 52 | 53 | return dataset 54 | -------------------------------------------------------------------------------- /dmb/data/datasets/flow/flying_chairs/__init__.py: -------------------------------------------------------------------------------- 1 | from .base import FlyingChairsDataset 2 | 3 | __all__ = ["FlyingChairsDataset"] 4 | -------------------------------------------------------------------------------- /dmb/data/datasets/flow/flying_chairs/base.py: -------------------------------------------------------------------------------- 1 | import os.path as osp 2 | import numpy as np 3 | from imageio import imread 4 | 5 | from dmb.data.datasets.flow.base import FlowDatasetBase 6 | from dmb.data.datasets.utils import load_flying_chairs_flow 7 | 8 | 9 | class FlyingChairsDataset(FlowDatasetBase): 10 | 11 | def __init__(self, annFile, root, transform=None): 12 | super(FlyingChairsDataset, self).__init__(annFile, root, transform) 13 | 14 | def Loader(self, item): 15 | # only take first three RGB channel no matter in RGB or RGBA format 16 | leftImage = imread( 17 | osp.join(self.root, item['left_image_path']) 18 | ).transpose(2, 0, 1).astype(np.float32)[:3] 19 | rightImage = imread( 20 | osp.join(self.root, item['right_image_path']) 21 | ).transpose(2, 0, 1).astype(np.float32)[:3] 22 | 23 | h, w = leftImage.shape[1], leftImage.shape[2] 24 | original_size = (h, w) 25 | 26 | if 'flow_path' in item.keys() and item['flow_path'] is not None: 27 | flow = load_flying_chairs_flow( 28 | osp.join(self.root, item['flow_path']) 29 | ).transpose(2, 0, 1).astype(np.float32) 30 | 31 | else: 32 | flow = None 33 | 34 | 35 | return { 36 | 'leftImage': leftImage, 37 | 'rightImage': rightImage, 38 | 'flow': flow, 39 | 'original_size': original_size, 40 | } 41 | 42 | @property 43 | def name(self): 44 | return 'FlyingChairs' 45 | -------------------------------------------------------------------------------- /dmb/data/datasets/stereo/__init__.py: -------------------------------------------------------------------------------- 1 | from .scene_flow import SceneFlowDataset 2 | from .kitti import Kitti2012Dataset, Kitti2015Dataset 3 | 4 | from .builder import build_stereo_dataset 5 | 6 | __all__ = [ 7 | "build_stereo_dataset", "SceneFlowDataset", 8 | "Kitti2015Dataset", "Kitti2012Dataset" 9 | ] 10 | -------------------------------------------------------------------------------- /dmb/data/datasets/stereo/base.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os.path as osp 3 | import numpy as np 4 | 5 | from torch.utils.data import Dataset 6 | 7 | 8 | class StereoDatasetBase(Dataset): 9 | def __init__(self, annFile, root, transform=None): 10 | self.annFile = annFile 11 | self.root = root 12 | self.data_list = self.annLoader() 13 | 14 | # transforms for data augmentation 15 | self.transform = transform 16 | 17 | self.flag = np.zeros(len(self.data_list), dtype=np.int64) 18 | 19 | def annLoader(self): 20 | data_list = [] 21 | with open(file=self.annFile, mode='r') as fp: 22 | data_list.extend(json.load(fp)) 23 | return data_list 24 | 25 | def Loader(self, item): 26 | raise NotImplementedError 27 | 28 | def __getitem__(self, idx): 29 | item = self.data_list[idx] 30 | sample = self.Loader(item) 31 | 32 | if self.transform is not None: 33 | sample = self.transform(sample) 34 | 35 | return sample 36 | 37 | def __len__(self): 38 | return len(self.data_list) 39 | 40 | def __repr__(self): 41 | repr_str = '{}\n'.format(self.__class__.__name__) 42 | repr_str += ' ' * 4 + 'Root: {}\n'.format(self.root) 43 | repr_str += ' ' * 4 + 'annFile: {}\n'.format(self.annFile) 44 | repr_str += ' ' * 4 + 'Length: {}\n'.format(self.__len__()) 45 | 46 | return repr_str 47 | 48 | @property 49 | def name(self): 50 | raise NotImplementedError 51 | -------------------------------------------------------------------------------- /dmb/data/datasets/stereo/builder.py: -------------------------------------------------------------------------------- 1 | from dmb.data.transforms import Compose 2 | from dmb.data.transforms import stereo_trans as T 3 | 4 | from dmb.data.datasets.stereo.scene_flow import SceneFlowDataset 5 | from dmb.data.datasets.stereo.kitti import Kitti2012Dataset, Kitti2015Dataset 6 | 7 | 8 | def build_transforms(cfg, type, is_train): 9 | input_shape = cfg.data[type].input_shape 10 | mean = cfg.data[type].mean 11 | std = cfg.data[type].std 12 | 13 | if is_train: 14 | transform = Compose( 15 | [ 16 | T.ToTensor(), 17 | T.RandomCrop(input_shape), 18 | T.Normalize(mean, std), 19 | ] 20 | ) 21 | else: 22 | transform = Compose( 23 | [ 24 | T.ToTensor(), 25 | T.StereoPad(input_shape), 26 | T.Normalize(mean, std), 27 | ] 28 | ) 29 | 30 | return transform 31 | 32 | 33 | def build_stereo_dataset(cfg, type): 34 | if type not in cfg.data: 35 | return None 36 | 37 | data_root = cfg.data[type].data_root 38 | data_type = cfg.data[type].type 39 | annFile = cfg.data[type].annfile 40 | 41 | is_train = True if type == 'train' else False 42 | transforms = build_transforms(cfg, type, is_train=is_train) 43 | 44 | if 'SceneFlow' in data_type: 45 | dataset = SceneFlowDataset(annFile, data_root, transforms) 46 | elif 'KITTI' in data_type: 47 | if '2012' in data_type: 48 | dataset = Kitti2012Dataset(annFile, data_root, transforms) 49 | elif '2015' in data_type: 50 | dataset = Kitti2015Dataset(annFile, data_root, transforms) 51 | else: 52 | raise ValueError("invalid data type: {}".format(data_type)) 53 | else: 54 | raise ValueError("invalid data type: {}".format(data_type)) 55 | 56 | return dataset 57 | -------------------------------------------------------------------------------- /dmb/data/datasets/stereo/kitti/__init__.py: -------------------------------------------------------------------------------- 1 | from .kitti_2012 import Kitti2012Dataset 2 | from .kitti_2015 import Kitti2015Dataset 3 | 4 | __all__ = ["Kitti2012Dataset", "Kitti2015Dataset"] 5 | -------------------------------------------------------------------------------- /dmb/data/datasets/stereo/kitti/base.py: -------------------------------------------------------------------------------- 1 | import os.path as osp 2 | 3 | import numpy as np 4 | from imageio import imread 5 | 6 | from dmb.data.datasets.stereo.base import StereoDatasetBase 7 | 8 | 9 | class KittiDatasetBase(StereoDatasetBase): 10 | 11 | def __init__(self, annFile, root, transform=None): 12 | super(KittiDatasetBase, self).__init__(annFile, root, transform) 13 | 14 | def Loader(self, item): 15 | # only take first three RGB channel no matter in RGB or RGBA format 16 | leftImage = imread( 17 | osp.join(self.root, item['left_image_path']) 18 | ).transpose(2, 0, 1).astype(np.float32)[:3] 19 | rightImage = imread( 20 | osp.join(self.root, item['right_image_path']) 21 | ).transpose(2, 0, 1).astype(np.float32)[:3] 22 | 23 | h, w = leftImage.shape[1], leftImage.shape[2] 24 | original_size = (h, w) 25 | 26 | sample = { 27 | 'leftImage': leftImage, 28 | 'rightImage': rightImage, 29 | 'original_size': original_size, 30 | } 31 | 32 | 33 | if 'left_disp_map_path' in item.keys() and item['left_disp_map_path'] is not None: 34 | leftDisp = imread( 35 | osp.join(self.root, item['left_disp_map_path']) 36 | ).astype(np.float32) / 256.0 37 | leftDisp = leftDisp[np.newaxis, ...] 38 | 39 | sample.update(leftDisp=leftDisp) 40 | 41 | if 'right_disp_map_path' in item.keys() and item['right_disp_map_path'] is not None: 42 | rightDisp = imread( 43 | osp.join(self.root, item['right_disp_map_path']) 44 | ).astype(np.float32) / 256.0 45 | rightDisp = rightDisp[np.newaxis, ...] 46 | 47 | sample.update(rightDisp=rightDisp) 48 | 49 | return sample 50 | 51 | @property 52 | def name(self): 53 | return 'KITTI' 54 | -------------------------------------------------------------------------------- /dmb/data/datasets/stereo/kitti/kitti_2012.py: -------------------------------------------------------------------------------- 1 | import os.path as osp 2 | import numpy as np 3 | from imageio import imread 4 | 5 | from dmb.data.datasets.stereo.kitti.base import KittiDatasetBase 6 | 7 | 8 | class Kitti2012Dataset(KittiDatasetBase): 9 | 10 | def __init__(self, annFile, root, transform=None): 11 | super(Kitti2012Dataset, self).__init__(annFile, root, transform) 12 | 13 | @property 14 | def name(self): 15 | return 'KITTI-2012' 16 | -------------------------------------------------------------------------------- /dmb/data/datasets/stereo/kitti/kitti_2015.py: -------------------------------------------------------------------------------- 1 | import os.path as osp 2 | import numpy as np 3 | from imageio import imread 4 | 5 | from dmb.data.datasets.stereo.kitti.base import KittiDatasetBase 6 | 7 | 8 | class Kitti2015Dataset(KittiDatasetBase): 9 | 10 | def __init__(self, annFile, root, transform=None): 11 | super(Kitti2015Dataset, self).__init__(annFile, root, transform) 12 | 13 | @property 14 | def name(self): 15 | return 'KITTI-2015' 16 | -------------------------------------------------------------------------------- /dmb/data/datasets/stereo/scene_flow/__init__.py: -------------------------------------------------------------------------------- 1 | from .base import SceneFlowDataset 2 | 3 | __all__ = ["SceneFlowDataset"] 4 | -------------------------------------------------------------------------------- /dmb/data/datasets/stereo/scene_flow/base.py: -------------------------------------------------------------------------------- 1 | import os.path as osp 2 | import numpy as np 3 | from imageio import imread 4 | 5 | from dmb.data.datasets.stereo.base import StereoDatasetBase 6 | from dmb.data.datasets.utils import load_scene_flow_disp 7 | 8 | 9 | class SceneFlowDataset(StereoDatasetBase): 10 | 11 | def __init__(self, annFile, root, transform=None): 12 | super(SceneFlowDataset, self).__init__(annFile, root, transform) 13 | 14 | def Loader(self, item): 15 | # only take first three RGB channel no matter in RGB or RGBA format 16 | leftImage = imread( 17 | osp.join(self.root, item['left_image_path']) 18 | ).transpose(2, 0, 1).astype(np.float32)[:3] 19 | rightImage = imread( 20 | osp.join(self.root, item['right_image_path']) 21 | ).transpose(2, 0, 1).astype(np.float32)[:3] 22 | 23 | h, w = leftImage.shape[1], leftImage.shape[2] 24 | original_size = (h, w) 25 | 26 | if 'left_disp_map_path' in item.keys() and item['left_disp_map_path'] is not None: 27 | leftDisp = load_scene_flow_disp( 28 | osp.join(self.root, item['left_disp_map_path']) 29 | ) 30 | leftDisp = leftDisp[np.newaxis, ...] 31 | 32 | else: 33 | leftDisp = None 34 | 35 | if 'right_disp_map_path' in item.keys() and item['right_disp_map_path'] is not None: 36 | rightDisp = load_scene_flow_disp( 37 | osp.join(self.root, item['right_disp_map_path']) 38 | ) 39 | rightDisp = rightDisp[np.newaxis, ...] 40 | 41 | else: 42 | rightDisp = None 43 | 44 | return { 45 | 'leftImage': leftImage, 46 | 'rightImage': rightImage, 47 | 'leftDisp': leftDisp, 48 | 'rightDisp': rightDisp, 49 | 'original_size': original_size, 50 | } 51 | 52 | @property 53 | def name(self): 54 | return 'SceneFlow' 55 | -------------------------------------------------------------------------------- /dmb/data/datasets/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .load_disp import load_scene_flow_disp 2 | from .load_flow import load_flying_chairs_flow, load_flying_things_flow, load_kitti_flow, write_flying_chairs_flow 3 | -------------------------------------------------------------------------------- /dmb/data/datasets/utils/load_disp.py: -------------------------------------------------------------------------------- 1 | import re 2 | import numpy as np 3 | 4 | 5 | def load_pfm(file_path): 6 | """ 7 | load image in PFM type. 8 | Args: 9 | file_path string: file path(absolute) 10 | Returns: 11 | data (numpy.array): data of image in (Height, Width[, 3]) layout 12 | scale (float): scale of image 13 | """ 14 | with open(file_path, encoding="ISO-8859-1") as fp: 15 | color = None 16 | width = None 17 | height = None 18 | scale = None 19 | endian = None 20 | 21 | # load file header and grab channels, if is 'PF' 3 channels else 1 channel(gray scale) 22 | header = fp.readline().rstrip() 23 | if header == 'PF': 24 | color = True 25 | elif header == 'Pf': 26 | color = False 27 | else: 28 | raise Exception('Not a PFM file.') 29 | 30 | # grab image dimensions 31 | dim_match = re.match(r'^(\d+)\s(\d+)\s$', fp.readline()) 32 | if dim_match: 33 | width, height = map(int, dim_match.groups()) 34 | else: 35 | raise Exception('Malformed PFM header.') 36 | 37 | # grab image scale 38 | scale = float(fp.readline().rstrip()) 39 | if scale < 0: # little-endian 40 | endian = '<' 41 | scale = -scale 42 | else: 43 | endian = '>' # big-endian 44 | 45 | # grab image data 46 | data = np.fromfile(fp, endian + 'f') 47 | shape = (height, width, 3) if color else (height, width) 48 | 49 | # reshape data to [Height, Width, Channels] 50 | data = np.reshape(data, shape) 51 | data = np.flipud(data) 52 | 53 | return data, scale 54 | 55 | 56 | # load utils 57 | def load_scene_flow_disp(img_path): 58 | """load scene flow disparity image 59 | Args: 60 | img_path: 61 | Returns: 62 | """ 63 | assert img_path.endswith('.pfm'), "scene flow disparity image must end with .pfm" \ 64 | "but got {}".format(img_path) 65 | 66 | disp_img, __ = load_pfm(img_path) 67 | 68 | return disp_img 69 | -------------------------------------------------------------------------------- /dmb/data/loaders/__init__.py: -------------------------------------------------------------------------------- 1 | from .builder import build_data_loader 2 | -------------------------------------------------------------------------------- /dmb/data/loaders/builder.py: -------------------------------------------------------------------------------- 1 | from functools import partial 2 | 3 | from torch.utils.data import DataLoader 4 | 5 | from mmcv.parallel import collate 6 | from mmcv.runner import get_dist_info 7 | 8 | from .samplers import GroupSampler, DistributedGroupSampler, DistributedSampler 9 | 10 | # https://github.com/pytorch/pytorch/issues/973 11 | import resource 12 | 13 | rlimit = resource.getrlimit(resource.RLIMIT_NOFILE) 14 | resource.setrlimit(resource.RLIMIT_NOFILE, (4096, rlimit[1])) 15 | 16 | 17 | def build_data_loader( 18 | dataset, 19 | imgs_per_gpu, 20 | workers_per_gpu, 21 | num_gpus=1, 22 | dist=True, 23 | **kwargs 24 | ): 25 | shuffle = kwargs.get('shuffle', True) 26 | if dist: 27 | rank, world_size = get_dist_info() 28 | if shuffle: 29 | sampler = DistributedGroupSampler( 30 | dataset, imgs_per_gpu, world_size, rank 31 | ) 32 | else: 33 | sampler = DistributedSampler( 34 | dataset, world_size, rank, shuffle=False 35 | ) 36 | batch_size = imgs_per_gpu 37 | num_workers = workers_per_gpu 38 | else: 39 | sampler = GroupSampler(dataset, imgs_per_gpu) if shuffle else None 40 | batch_size = num_gpus * imgs_per_gpu 41 | num_workers = num_gpus * workers_per_gpu 42 | 43 | data_loader = DataLoader( 44 | dataset, 45 | batch_size=batch_size, 46 | sampler=sampler, 47 | num_workers=num_workers, 48 | collate_fn=partial(collate, samples_per_gpu=imgs_per_gpu), 49 | pin_memory=False, 50 | **kwargs) 51 | 52 | return data_loader 53 | -------------------------------------------------------------------------------- /dmb/data/transforms/__init__.py: -------------------------------------------------------------------------------- 1 | from .transforms import Compose 2 | -------------------------------------------------------------------------------- /dmb/data/transforms/builder.py: -------------------------------------------------------------------------------- 1 | from . import transforms as T 2 | 3 | 4 | def build_transforms(cfg, is_train=True): 5 | return None 6 | -------------------------------------------------------------------------------- /dmb/data/transforms/stereo_trans.py: -------------------------------------------------------------------------------- 1 | import random 2 | import numbers 3 | import numpy as np 4 | 5 | import torch 6 | from torch.nn.functional import pad 7 | import torchvision.transforms.functional as F 8 | 9 | 10 | class ToTensor(object): 11 | """ 12 | convert numpy.ndarray to torch.floatTensor, in [Channels, Height, Width] 13 | """ 14 | def __call__(self, sample): 15 | for k in sample.keys(): 16 | if sample[k] is not None and isinstance(sample[k], np.ndarray): 17 | sample[k] = torch.from_numpy(sample[k].copy()) 18 | return sample 19 | 20 | 21 | class CenterCrop(object): 22 | """Crops the given image at central location to have a region of 23 | the given size. size can be a tuple (target_height, target_width) 24 | or an integer, in which case the target will be of a square shape (size, size) 25 | """ 26 | 27 | def __init__(self, size): 28 | if isinstance(size, numbers.Number): 29 | self.size = (int(size), int(size)) 30 | else: 31 | self.size = size 32 | 33 | def __call__(self, sample): 34 | 35 | h, w = sample['leftImage'].shape[-2:] 36 | th, tw = self.size 37 | if w == tw and h == th: 38 | return sample 39 | 40 | x1 = (w - tw) // 2 41 | y1 = (h - th) // 2 42 | 43 | for k in sample.keys(): 44 | if sample[k] is not None and isinstance(sample[k], (np.ndarray, torch.Tensor)): 45 | sample[k] = sample[k][:, y1: y1 + th, x1: x1 + tw] 46 | return sample 47 | 48 | 49 | class RandomCrop(object): 50 | """Crops the given image at a random location to have a region of 51 | the given size. size can be a tuple (target_height, target_width) 52 | or an integer, in which case the target will be of a square shape (size, size) 53 | """ 54 | 55 | def __init__(self, size): 56 | if isinstance(size, numbers.Number): 57 | self.size = (int(size), int(size)) 58 | else: 59 | self.size = size 60 | 61 | def __call__(self, sample): 62 | 63 | h, w = sample['leftImage'].shape[-2:] 64 | th, tw = self.size 65 | if w == tw and h == th: 66 | return sample 67 | 68 | x1 = random.randint(0, w - tw) 69 | y1 = random.randint(0, h - th) 70 | 71 | for k in sample.keys(): 72 | if sample[k] is not None and isinstance(sample[k], (np.ndarray, torch.Tensor)): 73 | sample[k] = sample[k][:, y1: y1 + th, x1: x1 + tw] 74 | return sample 75 | 76 | 77 | class Normalize(object): 78 | def __init__(self, mean, std): 79 | self.mean = mean 80 | self.std = std 81 | 82 | def __call__(self, sample): 83 | sample['leftImage'] = F.normalize( 84 | sample['leftImage'], mean=self.mean, std=self.std 85 | ) 86 | sample['rightImage'] = F.normalize( 87 | sample['rightImage'], mean=self.mean, std=self.std 88 | ) 89 | return sample 90 | 91 | 92 | class StereoPad(object): 93 | def __init__(self, size): 94 | if isinstance(size, numbers.Number): 95 | self.size = (int(size), int(size)) 96 | else: 97 | self.size = size 98 | 99 | def __call__(self, sample): 100 | h, w = sample['leftImage'].shape[-2:] 101 | th, tw = self.size 102 | if w == tw and h == th: 103 | return sample 104 | 105 | pad_left = 0 106 | pad_right = tw - w 107 | pad_top = th - h 108 | pad_bottom = 0 109 | 110 | sample['leftImage'] = pad( 111 | sample['leftImage'], [pad_left, pad_right, pad_top, pad_bottom], 112 | mode='constant', value=0 113 | ) 114 | sample['rightImage'] = pad( 115 | sample['rightImage'], [pad_left, pad_right, pad_top, pad_bottom], 116 | mode='constant', value=0 117 | ) 118 | 119 | return sample 120 | -------------------------------------------------------------------------------- /dmb/data/transforms/transforms.py: -------------------------------------------------------------------------------- 1 | class Compose(object): 2 | def __init__(self, transforms): 3 | self.transforms = transforms 4 | 5 | def __call__(self, sample): 6 | for t in self.transforms: 7 | sample = t(sample) 8 | return sample 9 | 10 | def __repr__(self): 11 | format_string = self.__class__.__name__ + "(" 12 | for t in self.transforms: 13 | format_string += "\n" 14 | format_string += " {0}".format(t) 15 | format_string += "\n)" 16 | return format_string 17 | -------------------------------------------------------------------------------- /dmb/modeling/__init__.py: -------------------------------------------------------------------------------- 1 | from .flow.models import _META_ARCHITECTURES as _FLOW_META_ARCHITECTURES 2 | from .stereo.models import _META_ARCHITECTURES as _STEREO_META_ARCHITECTURES 3 | 4 | _META_ARCHITECTURES = dict() 5 | 6 | _META_ARCHITECTURES.update(_FLOW_META_ARCHITECTURES) 7 | _META_ARCHITECTURES.update(_STEREO_META_ARCHITECTURES) 8 | 9 | 10 | def build_model(cfg): 11 | meta_arch = _META_ARCHITECTURES[cfg.model.meta_architecture] 12 | return meta_arch(cfg) 13 | -------------------------------------------------------------------------------- /dmb/modeling/flow/__init__.py: -------------------------------------------------------------------------------- 1 | from .models import build_flow_model -------------------------------------------------------------------------------- /dmb/modeling/flow/models/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | _META_ARCHITECTURES = { 3 | 4 | } 5 | 6 | 7 | def build_flow_model(cfg): 8 | meta_arch = _META_ARCHITECTURES[cfg.model.meta_architecture] 9 | return meta_arch(cfg) -------------------------------------------------------------------------------- /dmb/modeling/stereo/__init__.py: -------------------------------------------------------------------------------- 1 | from .models import build_stereo_model 2 | -------------------------------------------------------------------------------- /dmb/modeling/stereo/backbones/GCNet.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | from dmb.modeling.stereo.layers.basic_layers import conv_bn_relu, BasicBlock 6 | 7 | 8 | class GCNetBackbone(nn.Module): 9 | """ 10 | Backbone proposed in GCNet. 11 | Args: 12 | in_planes (int): the channels of input 13 | batch_norm (bool): whether use batch normalization layer, default True 14 | Inputs: 15 | l_img (Tensor): left image, in [BatchSize, 3, Height, Width] 16 | r_img (Tensor): right image, in [BatchSize, 3, Height, Width] 17 | Outputs: 18 | l_fms (Tensor): left image feature maps, in [BatchSize, 32, Height//2, Width//2] 19 | right (Tensor): right image feature maps, in [BatchSize, 32, Height//2, Width//2] 20 | """ 21 | 22 | def __init__(self, in_planes, batch_norm=True): 23 | super(GCNetBackbone, self).__init__() 24 | self.in_planes = in_planes 25 | 26 | self.backbone = nn.Sequential( 27 | conv_bn_relu(batch_norm, self.in_planes, 32, 5, 2, 2), 28 | BasicBlock(batch_norm, 32, 32, 1, None, 1, 1), 29 | BasicBlock(batch_norm, 32, 32, 1, None, 1, 1), 30 | BasicBlock(batch_norm, 32, 32, 1, None, 1, 1), 31 | BasicBlock(batch_norm, 32, 32, 1, None, 1, 1), 32 | BasicBlock(batch_norm, 32, 32, 1, None, 1, 1), 33 | BasicBlock(batch_norm, 32, 32, 1, None, 1, 1), 34 | BasicBlock(batch_norm, 32, 32, 1, None, 1, 1), 35 | BasicBlock(batch_norm, 32, 32, 1, None, 1, 1), 36 | nn.Conv2d(32, 32, 3, 1, 1) 37 | ) 38 | 39 | def forward(self, *input): 40 | if len(input) != 2: 41 | raise ValueError('expected input length 2 (got {} length input)'.format(len(input))) 42 | l_img, r_img = input 43 | 44 | l_fms = self.backbone(l_img) 45 | r_fms = self.backbone(r_img) 46 | 47 | return l_fms, r_fms 48 | -------------------------------------------------------------------------------- /dmb/modeling/stereo/backbones/StereoNet.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | from dmb.modeling.stereo.layers.basic_layers import conv_bn, conv_bn_relu, BasicBlock 6 | 7 | class DownsampleHead(nn.Module): 8 | """ 9 | Args: 10 | in_planes (int): the channels of input 11 | out_planes (int): the channels of output 12 | batchNorm, (bool): whether use batch normalization layer, default True 13 | Inputs: 14 | x, (tensor): feature in (BatchSize, in_planes, Height, Width) layout 15 | Outputs: 16 | down_x, (tensor): downsampled feature in (BatchSize, out_planes, Height, Width) layout 17 | """ 18 | 19 | def __init__(self, in_planes, out_planes, batch_norm=True): 20 | super(DownsampleHead, self).__init__() 21 | 22 | self.in_planes = in_planes 23 | self.out_planes = out_planes 24 | self.batch_norm = batch_norm 25 | 26 | self.downsample = nn.Conv2d(in_planes, out_planes, kernel_size=5, 27 | stride=2, padding=2, bias=True) 28 | 29 | def forward(self, x): 30 | down_x = self.downsample(x) 31 | return down_x 32 | 33 | 34 | class StereoNetBackbone(nn.Module): 35 | """ 36 | Backbone proposed in StereoNet. 37 | Args: 38 | in_planes (int): the channels of input 39 | batch_norm (bool): whether use batch normalization layer, default True 40 | downsample_num (int): the number of downsample module, 41 | the input RGB image will be downsample to 1/2^num resolution, default 3, i.e., 1/8 resolution 42 | residual_num (int): the number of residual blocks, used for robust feature extraction 43 | Inputs: 44 | l_img (Tensor): left image, in [BatchSize, 3, Height, Width] layout 45 | r_img (Tensor): right image, in [BatchSize, 3, Height, Width] layout 46 | Outputs: 47 | l_fms (Tensor): left image feature maps, in [BatchSize, 32, Height//8, Width//8] layout 48 | r_fms (Tensor): right image feature maps, in [BatchSize, 32, Height//8, Width//8] layout 49 | """ 50 | 51 | def __init__(self, in_planes=3, batch_norm=True, downsample_num=3, residual_num=6): 52 | super(StereoNetBackbone, self).__init__() 53 | self.in_planes = in_planes 54 | self.batch_norm = batch_norm 55 | self.downsample_num = downsample_num 56 | self.residual_num = residual_num 57 | 58 | # Continuously downsample the input RGB image to 1/2^num resolution 59 | in_planes = self.in_planes 60 | out_planes = 32 61 | 62 | self.downsample = nn.ModuleList() 63 | for _ in range(self.downsample_num): 64 | self.downsample.append(DownsampleHead(in_planes, out_planes)) 65 | in_planes = out_planes 66 | out_planes = 32 67 | 68 | # Build residual feature extraction module 69 | self.residual_blocks = nn.ModuleList() 70 | for _ in range(self.residual_num): 71 | self.residual_blocks.append(BasicBlock( 72 | self.batch_norm, 32, 32, stride=1, downsample=None, padding=1, dilation=1 73 | )) 74 | 75 | self.lastconv = nn.Conv2d(32, 32, kernel_size=3, stride=1, padding=1, bias=True) 76 | 77 | 78 | def _forward(self, x): 79 | 80 | for i in range(self.downsample_num): 81 | x = self.downsample[i](x) 82 | 83 | for i in range(self.residual_num): 84 | x = self.residual_blocks[i](x) 85 | 86 | output_feature = self.lastconv(x) 87 | 88 | return output_feature 89 | 90 | def forward(self, *input): 91 | if len(input) != 2: 92 | raise ValueError('expected input length 2 (got {} length input)'.format(len(input))) 93 | 94 | l_img, r_img = input 95 | 96 | l_fms = self._forward(l_img) 97 | r_fms = self._forward(r_img) 98 | 99 | return l_fms, r_fms 100 | -------------------------------------------------------------------------------- /dmb/modeling/stereo/backbones/__init__.py: -------------------------------------------------------------------------------- 1 | from .backbones import build_backbone 2 | 3 | -------------------------------------------------------------------------------- /dmb/modeling/stereo/backbones/backbones.py: -------------------------------------------------------------------------------- 1 | from .GCNet import GCNetBackbone 2 | from .PSMNet import PSMNetBackbone 3 | from .StereoNet import StereoNetBackbone 4 | from .DeepPruner import DeepPrunerBestBackbone, DeepPrunerFastBackbone 5 | from .AnyNet import AnyNetBackbone 6 | 7 | BACKBONES = { 8 | 'GCNet': GCNetBackbone, 9 | 'PSMNet': PSMNetBackbone, 10 | 'StereoNet': StereoNetBackbone, 11 | 'BestDeepPruner': DeepPrunerBestBackbone, 12 | 'FastDeepPruner': DeepPrunerFastBackbone, 13 | 'AnyNet': AnyNetBackbone, 14 | } 15 | 16 | def build_backbone(cfg): 17 | backbone_type = cfg.model.backbone.type 18 | 19 | assert backbone_type in BACKBONES, \ 20 | "model backbone type not found, excepted: {}," \ 21 | "but got {}".format(BACKBONES.keys, backbone_type) 22 | 23 | default_args = cfg.model.backbone.copy() 24 | default_args.pop('type') 25 | default_args.update(batch_norm=cfg.model.batch_norm) 26 | 27 | backbone = BACKBONES[backbone_type](**default_args) 28 | 29 | return backbone 30 | -------------------------------------------------------------------------------- /dmb/modeling/stereo/backbones/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DeepMotionAIResearch/DenseMatchingBenchmark/010aeb66e3ceaf3d866036b0ca751861df39432d/dmb/modeling/stereo/backbones/utils/__init__.py -------------------------------------------------------------------------------- /dmb/modeling/stereo/cmn/__init__.py: -------------------------------------------------------------------------------- 1 | from .cmn import build_cmn 2 | -------------------------------------------------------------------------------- /dmb/modeling/stereo/cmn/cmn.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | from dmb.modeling.stereo.layers.basic_layers import conv_bn_relu 6 | 7 | from .loss import make_cmn_loss_evaluator 8 | 9 | 10 | class ConfHead(nn.Module): 11 | """ 12 | Args: 13 | in_planes (int): usually cost volume used to calculate confidence map with $in_planes$ in Channel Dimension 14 | batch_norm, (bool): whether use batch normalization layer, default True 15 | Inputs: 16 | cost, (tensor): cost volume in (BatchSize, in_planes, Height, Width) layout 17 | Outputs: 18 | confCost, (tensor): in (BatchSize, 1, Height, Width) layout 19 | """ 20 | 21 | def __init__(self, in_planes, batch_norm=True): 22 | super(ConfHead, self).__init__() 23 | 24 | self.in_planes = in_planes 25 | self.sec_in_planes = int(self.in_planes // 3) 26 | self.sec_in_planes = self.sec_in_planes if self.sec_in_planes > 0 else 1 27 | 28 | self.conf_net = nn.Sequential( 29 | conv_bn_relu(batch_norm, self.in_planes, self.sec_in_planes, 3, 1, 1, bias=False), 30 | nn.Conv2d(self.sec_in_planes, 1, 1, 1, 0, bias=False) 31 | ) 32 | 33 | def forward(self, cost): 34 | conf = self.conf_net(cost) 35 | return conf 36 | 37 | 38 | # confidence measure network 39 | class Cmn(nn.Module): 40 | 41 | def __init__(self, cfg, in_planes, num, alpha, beta): 42 | super(Cmn, self).__init__() 43 | self.cfg = cfg.copy() 44 | 45 | batch_norm = self.cfg.model.batch_norm 46 | conf_heads = nn.ModuleList( 47 | [ConfHead(in_planes, batch_norm) for _ in range(num)] 48 | ) 49 | loss_evaluator = make_cmn_loss_evaluator(cfg) 50 | 51 | self.alpha = alpha 52 | self.beta = beta 53 | 54 | self.conf_heads = conf_heads 55 | self.loss_evaluator = loss_evaluator 56 | 57 | def get_confidence(self, costs): 58 | assert len(self.conf_heads) == len(costs), "NUM of confidence heads({}) must be equal to NUM" \ 59 | "of cost volumes({})".format(len(self.conf_heads), len(costs)) 60 | 61 | # for convenience to use log sigmoid when calculate loss, 62 | # we don't directly confidence cost to confidence by sigmoid 63 | conf_costs = [conf_head(cost) for cost, conf_head in zip(costs, self.conf_heads)] 64 | # convert to confidence 65 | confs = [torch.sigmoid(conf_cost) for conf_cost in conf_costs] 66 | # calculate variance modulated by confidence 67 | cost_vars = [self.alpha * (1 - conf) + self.beta for conf in confs] 68 | 69 | return confs, cost_vars, conf_costs 70 | 71 | def get_loss(self, confs, target=None): 72 | cm_losses = self.loss_evaluator(confs, target) 73 | 74 | return cm_losses 75 | 76 | def forward(self, costs, target=None): 77 | confs, cost_vars, conf_costs = self.get_confidence(costs) 78 | 79 | if self.training: 80 | cm_losses = self.get_loss(conf_costs, target) 81 | return cost_vars, cm_losses 82 | else: 83 | return cost_vars, confs 84 | 85 | 86 | def build_cmn(cfg): 87 | in_planes = cfg.model.cmn.in_planes 88 | num = cfg.model.cmn.num 89 | alpha = cfg.model.cmn.alpha 90 | beta = cfg.model.cmn.beta 91 | 92 | return Cmn(cfg, in_planes, num, alpha, beta) 93 | -------------------------------------------------------------------------------- /dmb/modeling/stereo/cmn/loss.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.nn import functional as F 3 | 4 | from dmb.modeling.stereo.losses.conf_nll_loss import ConfidenceNllLoss 5 | 6 | 7 | def make_conf_nll_loss_evaluator(cfg): 8 | default_args = cfg.model.cmn.losses.nll_loss.copy() 9 | default_args.update(sparse=cfg.data.sparse) 10 | default_args.pop('weight') 11 | 12 | return ConfidenceNllLoss(**default_args) 13 | 14 | 15 | class CMNLossEvaluator(object): 16 | def __init__(self, cfg, loss_evaluators, loss_weights): 17 | self.cfg = cfg.copy() 18 | self.loss_evaluators = loss_evaluators 19 | self.loss_weights = loss_weights 20 | 21 | def __call__(self, confs, target): 22 | loss_dict = dict() 23 | 24 | for loss_name, loss_evaluator in self.loss_evaluators.items(): 25 | weight = self.loss_weights[loss_name] 26 | if isinstance(loss_evaluator, ConfidenceNllLoss): 27 | conf_nll_loss_dict = loss_evaluator(confs, target) 28 | conf_nll_loss_dict = {k: v * weight for k, v in conf_nll_loss_dict.items()} 29 | loss_dict.update(conf_nll_loss_dict) 30 | else: 31 | raise ValueError("{} not implemented.".format(loss_name)) 32 | 33 | return loss_dict 34 | 35 | 36 | def make_cmn_loss_evaluator(cfg): 37 | loss_evaluators = dict() 38 | loss_weights = dict() 39 | 40 | if "nll_loss" in cfg.model.cmn.losses: 41 | conf_nll_loss_evaluator = make_conf_nll_loss_evaluator(cfg) 42 | loss_evaluators["conf_nll_loss"] = conf_nll_loss_evaluator 43 | loss_weights["conf_nll_loss"] = cfg.model.cmn.losses.nll_loss.weight 44 | 45 | return CMNLossEvaluator( 46 | cfg, loss_evaluators, loss_weights 47 | ) 48 | -------------------------------------------------------------------------------- /dmb/modeling/stereo/conf_measure/__init__.py: -------------------------------------------------------------------------------- 1 | from .conf_net import ConfidenceEstimation 2 | from .calc_conf import pkrConf, apkrConf, nlmConf 3 | from .gen_conf import ConfGenerator 4 | 5 | __all__ = [ 6 | 'ConfidenceEstimation', 7 | 'pkrConf', 'apkrConf', 'nlmConf', 8 | 'ConfGenerator', 9 | ] 10 | -------------------------------------------------------------------------------- /dmb/modeling/stereo/conf_measure/conf_net.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | from dmb.modeling.stereo.layers.basic_layers import conv_bn_relu 6 | 7 | 8 | class ConfidenceEstimation(nn.Module): 9 | """ 10 | Args: 11 | in_planes, (int): usually cost volume used to calculate confidence map with $in_planes$ in Channel Dimension 12 | batchNorm, (bool): whether use batch normalization layer, default True 13 | Inputs: 14 | cost, (tensor): cost volume in (BatchSize, in_planes, Height, Width) layout 15 | Outputs: 16 | confCost, (tensor): in (BatchSize, 1, Height, Width) layout 17 | """ 18 | 19 | def __init__(self, in_planes, batchNorm=True): 20 | super(ConfidenceEstimation, self).__init__() 21 | 22 | self.in_planes = in_planes 23 | self.sec_in_planes = int(self.in_planes // 3) 24 | self.sec_in_planes = self.sec_in_planes if self.sec_in_planes > 0 else 1 25 | 26 | self.conf_net = nn.Sequential( 27 | conv_bn_relu(batchNorm, self.in_planes, self.sec_in_planes, 3, 1, 1, bias=False), 28 | nn.Conv2d(self.sec_in_planes, 1, 1, 1, 0, bias=False) 29 | ) 30 | 31 | def forward(self, cost): 32 | assert cost.shape[1] == self.in_planes 33 | 34 | confCost = self.conf_net(cost) 35 | 36 | return confCost 37 | -------------------------------------------------------------------------------- /dmb/modeling/stereo/conf_measure/gen_conf.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | 5 | class ConfGenerator(nn.Module): 6 | """ 7 | Implementation of Confidence ground-truth label generation 8 | Args: 9 | gtDisp: tensor, in (Height, Width) or (BatchSize, Height, Width) or (BatchSize, 1, Height, Width) layout 10 | estDisp: tensor, in (Height, Width) or (BatchSize, Height, Width) or (BatchSize, 1, Height, Width) layout 11 | theta: a threshold parameter to compare the ground-truth disparity map and the estimated disparity map 12 | Outputs: 13 | confidence_gt_label, in (BatchSize, 1, Height, Width) layout 14 | """ 15 | 16 | def __init__(self, theta): 17 | super(ConfGenerator, self).__init__() 18 | 19 | if not isinstance(theta, (int, float)): 20 | raise TypeError('(int,float) is expected, got {}'.format(type(theta))) 21 | 22 | self.theta = theta 23 | 24 | def forward(self, estDisp, gtDisp): 25 | 26 | if not torch.is_tensor(gtDisp): 27 | raise TypeError('ground truth disparity map is expected to be tensor, got {}'.format(type(gtDisp))) 28 | if not torch.is_tensor(estDisp): 29 | raise TypeError('estimated disparity map is expected to be tensor, got {}'.format(type(estDisp))) 30 | 31 | assert estDisp.shape == gtDisp.shape 32 | 33 | if gtDisp.dim() == 2: # single image H x W 34 | h, w = gtDisp.size(0), gtDisp.size(1) 35 | gtDisp = gtDisp.view(1, 1, h, w) 36 | estDisp = estDisp.view(1, 1, h, w) 37 | 38 | if gtDisp.dim() == 3: # multi image B x H x W 39 | b, h, w = gtDisp.size(0), gtDisp.size(1), gtDisp.size(2) 40 | gtDisp = gtDisp.view(b, 1, h, w) 41 | estDisp = estDisp.view(b, 1, h, w) 42 | 43 | if gtDisp.dim() == 4: 44 | if gtDisp.size(1) == 1: # mult image B x 1 x H x W 45 | self.gtDisp = gtDisp 46 | self.estDisp = estDisp 47 | else: 48 | raise ValueError('2nd dimension size should be 1, got {}'.format(gtDisp.size(1))) 49 | 50 | confidence_gt_label = torch.lt(torch.abs(self.estDisp - self.gtDisp), self.theta).type_as(self.gtDisp) 51 | 52 | return confidence_gt_label 53 | -------------------------------------------------------------------------------- /dmb/modeling/stereo/cost_processors/AnyNet.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | from .utils.dif_fms import fast_dif_fms 6 | from .aggregators.AnyNet import AnyNetAggregator 7 | 8 | class AnyNetProcessor(nn.Module): 9 | """ 10 | An implementation of cost procession in AnyNet 11 | 12 | Inputs: 13 | stage, (str): 'init_guess', the coarsest disparity estimation, 14 | 'warp_level_8', refine the disparity estimation with feature warp at resolution=1/8 15 | 'warp_level_4', refine the disparity estimation with feature warp at resolution=1/4 16 | left, (tensor): Left image feature, in [BatchSize, Channels, Height, Width] layout 17 | right, (tensor): Right image feature, in [BatchSize, Channels, Height, Width] layout 18 | disp, (tensor): Disparity map outputted from last stage, in [BatchSize, 1, Height, Width] layout 19 | 20 | Outputs: 21 | cost_volume (tuple of Tensor): cost volume 22 | in [BatchSize, MaxDisparity, Height, Width] layout 23 | 24 | """ 25 | 26 | def __init__(self, cfg): 27 | super(AnyNetProcessor, self).__init__() 28 | self.cfg = cfg.copy() 29 | self.batch_norm = cfg.model.batch_norm 30 | 31 | self.stage = self.cfg.model.stage 32 | 33 | # cost computation parameters, dict 34 | self.max_disp = self.cfg.model.cost_processor.cost_computation.max_disp 35 | self.start_disp = self.cfg.model.cost_processor.cost_computation.start_disp 36 | self.dilation = self.cfg.model.cost_processor.cost_computation.dilation 37 | 38 | 39 | # cost aggregation 40 | self.aggregator_type = self.cfg.model.cost_processor.cost_aggregator.type 41 | self.aggregator = nn.ModuleDict() 42 | for st in self.stage: 43 | self.aggregator[st] = AnyNetAggregator( 44 | in_planes=self.cfg.model.cost_processor.cost_aggregator.in_planes[st], 45 | agg_planes=self.cfg.model.cost_processor.cost_aggregator.agg_planes[st], 46 | num=self.cfg.model.cost_processor.cost_aggregator.num, 47 | batch_norm=self.batch_norm, 48 | ) 49 | 50 | def forward(self, stage, left, right, disp=None): 51 | B, C, H, W = left.shape 52 | # construct the raw cost volume 53 | 54 | end_disp = self.start_disp[stage] + self.max_disp[stage] - 1 55 | 56 | # disparity sample number 57 | D = (self.max_disp[stage] + self.dilation[stage] - 1) // self.dilation[stage] 58 | 59 | # generate disparity samples, in [B, D, H, W] layout 60 | disp_sample = torch.linspace(self.start_disp[stage], end_disp, D) 61 | disp_sample = disp_sample.view(1, D, 1, 1).expand(B, D, H, W).to(left.device).float() 62 | 63 | # if initial disparity guessed, used for warping 64 | if disp is not None: 65 | # up-sample disparity map to the size of left 66 | H, W = left.shape[-2:] 67 | scale = W / disp.shape[-1] 68 | disp = F.interpolate(disp * scale, size=(H, W), mode='bilinear', align_corners=False) 69 | # shift the disparity sample to be centered at the given disparity map 70 | disp_sample = disp_sample + disp 71 | 72 | # [B, C, D, H, W] 73 | raw_cost = fast_dif_fms(left, right, disp_sample=disp_sample) 74 | 75 | # list [[B, D, H, W]] 76 | cost = self.aggregator[stage](raw_cost) 77 | 78 | return cost 79 | 80 | 81 | 82 | 83 | -------------------------------------------------------------------------------- /dmb/modeling/stereo/cost_processors/__init__.py: -------------------------------------------------------------------------------- 1 | from .builder import build_cost_processor 2 | -------------------------------------------------------------------------------- /dmb/modeling/stereo/cost_processors/aggregators/AcfNet.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | from dmb.modeling.stereo.layers.basic_layers import conv3d_bn, conv3d_bn_relu, conv_bn_relu, conv_bn 5 | from dmb.modeling.stereo.cost_processors.utils.hourglass import Hourglass 6 | 7 | 8 | class AcfAggregator(nn.Module): 9 | """ 10 | Args: 11 | max_disp (int): max disparity 12 | in_planes (int): the channels of raw cost volume 13 | batch_norm (bool): whether use batch normalization layer, default True 14 | 15 | Inputs: 16 | raw_cost (Tensor): raw cost volume, 17 | in [BatchSize, Channels, MaxDisparity//4, Height//4, Width//4] layout 18 | 19 | Outputs: 20 | cost_volume (tuple of Tensor): cost volume 21 | in [BatchSize, MaxDisparity, Height, Width] layout 22 | """ 23 | 24 | def __init__(self, max_disp, in_planes=64, batch_norm=True): 25 | super(AcfAggregator, self).__init__() 26 | self.max_disp = max_disp 27 | self.in_planes = in_planes 28 | self.batch_norm = batch_norm 29 | 30 | self.dres0 = nn.Sequential( 31 | conv3d_bn_relu(batch_norm, self.in_planes, 32, 3, 1, 1), 32 | conv3d_bn_relu(batch_norm, 32, 32, 3, 1, 1), 33 | ) 34 | self.dres1 = nn.Sequential( 35 | conv3d_bn_relu(batch_norm, 32, 32, 3, 1, 1), 36 | conv3d_bn(batch_norm, 32, 32, 3, 1, 1) 37 | ) 38 | self.dres2 = Hourglass(in_planes=32, batch_norm=batch_norm) 39 | self.dres3 = Hourglass(in_planes=32, batch_norm=batch_norm) 40 | self.dres4 = Hourglass(in_planes=32, batch_norm=batch_norm) 41 | 42 | self.classif1 = nn.Sequential( 43 | conv3d_bn_relu(batch_norm, 32, 32, 3, 1, 1), 44 | nn.Conv3d(32, 1, kernel_size=3, stride=1, padding=1, bias=False), 45 | ) 46 | self.classif2 = nn.Sequential( 47 | conv3d_bn_relu(batch_norm, 32, 32, 3, 1, 1), 48 | nn.Conv3d(32, 1, kernel_size=3, stride=1, padding=1, bias=False), 49 | ) 50 | self.classif3 = nn.Sequential( 51 | conv3d_bn_relu(batch_norm, 32, 32, 3, 1, 1), 52 | nn.Conv3d(32, 1, kernel_size=3, stride=1, padding=1, bias=False) 53 | ) 54 | 55 | self.deconv1 = nn.ConvTranspose3d(1, 1, 8, 4, 2, bias=False) 56 | self.deconv2 = nn.ConvTranspose3d(1, 1, 8, 4, 2, bias=False) 57 | self.deconv3 = nn.ConvTranspose3d(1, 1, 8, 4, 2, bias=False) 58 | 59 | def forward(self, raw_cost): 60 | B, C, D, H, W = raw_cost.shape 61 | # concat_fms: (BatchSize, Channels*2, MaxDisparity/4, Height/4, Width/4) 62 | cost0 = self.dres0(raw_cost) 63 | cost0 = self.dres1(cost0) + cost0 64 | 65 | out1, pre1, post1 = self.dres2(cost0, None, None) 66 | out1 = out1 + cost0 67 | 68 | out2, pre2, post2 = self.dres3(out1, pre1, post1) 69 | out2 = out2 + cost0 70 | 71 | out3, pre3, post3 = self.dres4(out2, pre2, post2) 72 | out3 = out3 + cost0 73 | 74 | cost1 = self.classif1(out1) 75 | cost2 = self.classif2(out2) + cost1 76 | cost3 = self.classif3(out3) + cost2 77 | 78 | # (BatchSize, 1, MaxDisparity, Height, Width) 79 | full_h, full_w = H * 4, W * 4 80 | 81 | cost1 = self.deconv1(cost1, [self.max_disp, full_h, full_w]) 82 | cost2 = self.deconv2(cost2, [self.max_disp, full_h, full_w]) 83 | cost3 = self.deconv3(cost3, [self.max_disp, full_h, full_w]) 84 | 85 | # (BatchSize, MaxDisparity, Height, Width) 86 | cost1 = torch.squeeze(cost1, 1) 87 | cost2 = torch.squeeze(cost2, 1) 88 | cost3 = torch.squeeze(cost3, 1) 89 | 90 | return [cost3, cost2, cost1] 91 | 92 | 93 | -------------------------------------------------------------------------------- /dmb/modeling/stereo/cost_processors/aggregators/AnyNet.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | from dmb.modeling.stereo.layers.basic_layers import bn_relu_conv3d 5 | 6 | 7 | class AnyNetAggregator(nn.Module): 8 | """ 9 | Args: 10 | in_planes (int): the channels of raw cost volume 11 | agg_planes (int): the channels of middle 3d convolution layer 12 | num, (int): the number of middle 3d convolution layer 13 | batch_norm (bool): whether use batch normalization layer, default True 14 | 15 | Inputs: 16 | raw_cost (Tensor): raw cost volume, 17 | in [BatchSize, in_planes, MaxDisparity, Height, Width] layout 18 | 19 | Outputs: 20 | cost_volume (tuple of Tensor): cost volume 21 | in [BatchSize, MaxDisparity, Height, Width] layout 22 | """ 23 | 24 | def __init__(self, in_planes=1, agg_planes=4, num=4, batch_norm=True): 25 | super(AnyNetAggregator, self).__init__() 26 | self.in_planes = in_planes 27 | self.agg_planes = agg_planes 28 | self.num = num 29 | self.batch_norm = batch_norm 30 | 31 | self.agg_list = [bn_relu_conv3d(batch_norm, in_planes, agg_planes, kernel_size=3, 32 | stride=1, padding=1, dilation=1, bias=True)] 33 | self.agg_list += [bn_relu_conv3d(batch_norm, agg_planes, agg_planes, kernel_size=3, 34 | stride=1, padding=1, dilation=1, bias=True) for _ in range(num)] 35 | self.agg_list += [bn_relu_conv3d(batch_norm, agg_planes, 1, kernel_size=3, 36 | stride=1, padding=1, dilation=1, bias=True)] 37 | self.agg = nn.Sequential(*self.agg_list) 38 | 39 | def forward(self, raw_cost): 40 | # in: [B, in_planes, D, H, W], out: [B, 1, D, H, W] 41 | cost = self.agg(raw_cost) 42 | # [B, D, H, W] 43 | cost = cost.squeeze(dim=1) 44 | 45 | return [cost] 46 | 47 | 48 | -------------------------------------------------------------------------------- /dmb/modeling/stereo/cost_processors/aggregators/DeepPruner.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | from dmb.modeling.stereo.layers.basic_layers import conv3d_bn, conv3d_bn_relu 5 | from dmb.modeling.stereo.cost_processors.utils.hw_hourglass import HWHourglass 6 | 7 | 8 | class DeepPrunerAggregator(nn.Module): 9 | """ 10 | Args: 11 | in_planes (int): the channels of raw cost volume 12 | hourglass_in_planes (int): the channels of hourglass module for cost aggregation 13 | batch_norm (bool): whether use batch normalization layer, default True 14 | 15 | Inputs: 16 | raw_cost (Tensor): raw cost volume, in [BatchSize, in_planes, MaxDisparity, Height, Width] layout 17 | 18 | Outputs: 19 | cost_volume (tuple of Tensor): cost volume 20 | in [BatchSize, MaxDisparity, Height, Width] layout 21 | """ 22 | 23 | def __init__(self, in_planes, hourglass_in_planes, batch_norm=True): 24 | super(DeepPrunerAggregator, self).__init__() 25 | self.in_planes = in_planes 26 | self.hourglass_in_planes = hourglass_in_planes 27 | self.batch_norm = batch_norm 28 | 29 | self.dres0 = nn.Sequential( 30 | conv3d_bn_relu(batch_norm, in_planes, 64, kernel_size=3, stride=1, padding=1, bias=False), 31 | conv3d_bn_relu(batch_norm, 64, 32, kernel_size=3, stride=1, padding=1, bias=False), 32 | ) 33 | 34 | self.dres1 = nn.Sequential( 35 | conv3d_bn_relu(batch_norm, 32, 32, kernel_size=3, stride=1, padding=1, bias=False), 36 | conv3d_bn_relu(batch_norm, 32, hourglass_in_planes, kernel_size=3, stride=1, padding=1, bias=False), 37 | ) 38 | 39 | self.dres2 = HWHourglass(hourglass_in_planes, batch_norm=batch_norm) 40 | 41 | self.classify = nn.Sequential( 42 | conv3d_bn_relu(batch_norm, hourglass_in_planes, hourglass_in_planes * 2, 43 | kernel_size=3, stride=1, padding=1, bias=False), 44 | nn.Conv3d(hourglass_in_planes * 2, 1, kernel_size=3, stride=1, padding=1, bias=False) 45 | ) 46 | 47 | def forward(self, raw_cost): 48 | # in: [B, in_planes, D, H, W], out: [B, 64, D, H, W] 49 | cost = self.dres0(raw_cost) 50 | # in: [B, 64, D, H, W], out: [B, hourglass_in_planes, D, H, W] 51 | cost = self.dres1(cost) 52 | 53 | # in: [B, hourglass_in_planes, D, H, W], out: [B, hourglass_in_planes, D, H, W] 54 | cost = self.dres2(cost) + cost 55 | 56 | # in: [B, hourglass_in_planes, D, H, W], mid: [B, 1, D, H, W], out: [B, D, H, W] 57 | cost = self.classify(cost).squeeze(1) 58 | 59 | return [cost] 60 | -------------------------------------------------------------------------------- /dmb/modeling/stereo/cost_processors/aggregators/PSMNet.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | from dmb.modeling.stereo.layers.basic_layers import conv3d_bn, conv3d_bn_relu 6 | from dmb.modeling.stereo.cost_processors.utils.hourglass import Hourglass 7 | 8 | 9 | class PSMAggregator(nn.Module): 10 | """ 11 | Args: 12 | max_disp (int): max disparity 13 | in_planes (int): the channels of raw cost volume 14 | batch_norm (bool): whether use batch normalization layer, default True 15 | 16 | Inputs: 17 | raw_cost (Tensor): concatenation-based cost volume without further processing, 18 | in [BatchSize, in_planes, MaxDisparity//4, Height//4, Width//4] layout 19 | Outputs: 20 | cost_volume (tuple of Tensor): cost volume 21 | in [BatchSize, MaxDisparity, Height, Width] layout 22 | """ 23 | 24 | def __init__(self, max_disp, in_planes=64, batch_norm=True): 25 | super(PSMAggregator, self).__init__() 26 | self.max_disp = max_disp 27 | self.in_planes = in_planes 28 | self.batch_norm = batch_norm 29 | 30 | self.dres0 = nn.Sequential( 31 | conv3d_bn_relu(batch_norm, self.in_planes, 32, 3, 1, 1, bias=False), 32 | conv3d_bn_relu(batch_norm, 32, 32, 3, 1, 1, bias=False), 33 | ) 34 | self.dres1 = nn.Sequential( 35 | conv3d_bn_relu(batch_norm, 32, 32, 3, 1, 1, bias=False), 36 | conv3d_bn(batch_norm, 32, 32, 3, 1, 1, bias=False) 37 | ) 38 | self.dres2 = Hourglass(in_planes=32, batch_norm=batch_norm) 39 | self.dres3 = Hourglass(in_planes=32, batch_norm=batch_norm) 40 | self.dres4 = Hourglass(in_planes=32, batch_norm=batch_norm) 41 | 42 | self.classif1 = nn.Sequential( 43 | conv3d_bn_relu(batch_norm, 32, 32, 3, 1, 1, bias=False), 44 | nn.Conv3d(32, 1, kernel_size=3, stride=1, padding=1, bias=False), 45 | ) 46 | self.classif2 = nn.Sequential( 47 | conv3d_bn_relu(batch_norm, 32, 32, 3, 1, 1, bias=False), 48 | nn.Conv3d(32, 1, kernel_size=3, stride=1, padding=1, bias=False), 49 | ) 50 | self.classif3 = nn.Sequential( 51 | conv3d_bn_relu(batch_norm, 32, 32, 3, 1, 1, bias=False), 52 | nn.Conv3d(32, 1, kernel_size=3, stride=1, padding=1, bias=False) 53 | ) 54 | 55 | def forward(self, raw_cost): 56 | B, C, D, H, W = raw_cost.shape 57 | # raw_cost: (BatchSize, Channels*2, MaxDisparity/4, Height/4, Width/4) 58 | cost0 = self.dres0(raw_cost) 59 | cost0 = self.dres1(cost0) + cost0 60 | 61 | out1, pre1, post1 = self.dres2(cost0, None, None) 62 | out1 = out1 + cost0 63 | 64 | out2, pre2, post2 = self.dres3(out1, pre1, post1) 65 | out2 = out2 + cost0 66 | 67 | out3, pre3, post3 = self.dres4(out2, pre2, post2) 68 | out3 = out3 + cost0 69 | 70 | cost1 = self.classif1(out1) 71 | cost2 = self.classif2(out2) + cost1 72 | cost3 = self.classif3(out3) + cost2 73 | 74 | # (BatchSize, 1, max_disp, Height, Width) 75 | full_h, full_w = H * 4, W * 4 76 | align_corners = True 77 | cost1 = F.interpolate( 78 | cost1, [self.max_disp, full_h, full_w], 79 | mode='trilinear', align_corners=align_corners 80 | ) 81 | cost2 = F.interpolate( 82 | cost2, [self.max_disp, full_h, full_w], 83 | mode='trilinear', align_corners=align_corners 84 | ) 85 | cost3 = F.interpolate( 86 | cost3, [self.max_disp, full_h, full_w], 87 | mode='trilinear', align_corners=align_corners 88 | ) 89 | 90 | # (BatchSize, max_disp, Height, Width) 91 | cost1 = torch.squeeze(cost1, 1) 92 | cost2 = torch.squeeze(cost2, 1) 93 | cost3 = torch.squeeze(cost3, 1) 94 | 95 | return [cost3, cost2, cost1] 96 | -------------------------------------------------------------------------------- /dmb/modeling/stereo/cost_processors/aggregators/StereoNet.py: -------------------------------------------------------------------------------- 1 | import math 2 | import torch 3 | import torch.nn as nn 4 | import torch.nn.functional as F 5 | 6 | from dmb.modeling.stereo.layers.basic_layers import conv3d_bn, conv3d_bn_relu 7 | 8 | 9 | class StereoNetAggregator(nn.Module): 10 | """ 11 | Args: 12 | max_disp (int): max disparity 13 | in_planes (int): the channels of raw cost volume 14 | batch_norm (bool): whether use batch normalization layer, default True 15 | 16 | Inputs: 17 | raw_cost (Tensor): difference-based cost volume without further processing, 18 | in [BatchSize, in_planes, max_disp//8, Height//8, Width//8] layout (default) 19 | or in [BatchSize, in_planes, max_disp//16, Height//16, Width//16] layout 20 | 21 | Outputs: 22 | cost_volume (tuple of Tensor): cost volume 23 | in [BatchSize, max_disp//8, Height//8, Width//8] layout (default) 24 | or in [BatchSize, in_planes, max_disp//16, Height//16, Width//16] layout 25 | """ 26 | 27 | def __init__(self, max_disp, in_planes=32, batch_norm=True, num=4): 28 | super(StereoNetAggregator, self).__init__() 29 | self.max_disp = max_disp 30 | self.in_planes = in_planes 31 | self.batch_norm = batch_norm 32 | self.num = num 33 | 34 | self.classify = nn.ModuleList([ 35 | conv3d_bn_relu(self.batch_norm, in_planes, 32, kernel_size=3, 36 | stride=1, padding=1, dilation=1, bias=True) for _ in range(self.num) 37 | ]) 38 | 39 | self.lastconv = nn.Conv3d(32, 1, kernel_size=3, stride=1, padding=1, bias=True) 40 | 41 | 42 | def forward(self, raw_cost): 43 | # default down-sample to 1/8 resolution, it also can be 1/16 44 | # raw_cost: (BatchSize, Channels, MaxDisparity/8, Height/8, Width/8) 45 | for i in range(self.num): 46 | raw_cost = self.classify[i](raw_cost) 47 | 48 | # cost: (BatchSize, 1, MaxDisparity/8, Height/8, Width/8) 49 | cost = self.lastconv(raw_cost) 50 | 51 | # (BatchSize, MaxDisparity/8, Height/8, Width/8) 52 | cost = torch.squeeze(cost, 1) 53 | 54 | 55 | return [cost] 56 | -------------------------------------------------------------------------------- /dmb/modeling/stereo/cost_processors/aggregators/__init__.py: -------------------------------------------------------------------------------- 1 | from .builder import build_cost_aggregator 2 | -------------------------------------------------------------------------------- /dmb/modeling/stereo/cost_processors/aggregators/builder.py: -------------------------------------------------------------------------------- 1 | from .GCNet import GCAggregator 2 | from .PSMNet import PSMAggregator 3 | from .AcfNet import AcfAggregator 4 | from .StereoNet import StereoNetAggregator 5 | from .DeepPruner import DeepPrunerAggregator 6 | from .AnyNet import AnyNetAggregator 7 | 8 | AGGREGATORS = { 9 | "GCNet": GCAggregator, 10 | "PSMNet": PSMAggregator, 11 | "AcfNet": AcfAggregator, 12 | 'StereoNet': StereoNetAggregator, 13 | 'DeepPruner': DeepPrunerAggregator, 14 | 'AnyNet': AnyNetAggregator, 15 | } 16 | 17 | 18 | def build_cost_aggregator(cfg): 19 | agg_type = cfg.model.cost_processor.cost_aggregator.type 20 | assert agg_type in AGGREGATORS, "cost_aggregator type not found, excepted: {}," \ 21 | "but got {}".format(AGGREGATORS.keys(), agg_type) 22 | 23 | default_args = cfg.model.cost_processor.cost_aggregator.copy() 24 | default_args.pop('type') 25 | default_args.update(batch_norm=cfg.model.batch_norm) 26 | 27 | aggregator = AGGREGATORS[agg_type](**default_args) 28 | 29 | return aggregator 30 | -------------------------------------------------------------------------------- /dmb/modeling/stereo/cost_processors/builder.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | 3 | from .utils.cat_fms import CAT_FUNCS 4 | from .utils.dif_fms import DIF_FUNCS 5 | from .utils.correlation1d_cost import COR_FUNCS 6 | from .aggregators import build_cost_aggregator 7 | 8 | from .DeepPruner import DeepPrunerProcessor 9 | from .AnyNet import AnyNetProcessor 10 | 11 | 12 | class CostProcessor(nn.Module): 13 | 14 | def __init__(self): 15 | super(CostProcessor, self).__init__() 16 | 17 | def forward(self, *input): 18 | raise NotImplementedError 19 | 20 | # Concatenate left and right feature to form cost volume 21 | class CatCostProcessor(CostProcessor): 22 | 23 | def __init__(self, cfg): 24 | super(CatCostProcessor, self).__init__() 25 | cat_func = cfg.model.cost_processor.cost_computation.get('type', 'default') 26 | self.cat_func = CAT_FUNCS[cat_func] 27 | 28 | self.default_args = cfg.model.cost_processor.cost_computation.copy() 29 | self.default_args.pop('type') 30 | 31 | self.aggregator = build_cost_aggregator(cfg) 32 | 33 | def forward(self, ref_fms, tgt_fms, disp_sample=None): 34 | # 1. build raw cost by concat 35 | cat_cost = self.cat_func(ref_fms, tgt_fms, disp_sample=disp_sample, **self.default_args) 36 | 37 | # 2. aggregate cost by 3D-hourglass 38 | costs = self.aggregator(cat_cost) 39 | 40 | return costs 41 | 42 | 43 | # Use the difference between left and right feature to form cost volume 44 | class DifCostProcessor(CostProcessor): 45 | 46 | def __init__(self, cfg): 47 | super(DifCostProcessor, self).__init__() 48 | dif_func = cfg.model.cost_processor.cost_computation.get('type', 'default') 49 | self.dif_func = DIF_FUNCS[dif_func] 50 | 51 | self.default_args = cfg.model.cost_processor.cost_computation.copy() 52 | self.default_args.pop('type') 53 | 54 | self.aggregator = build_cost_aggregator(cfg) 55 | 56 | def forward(self, ref_fms, tgt_fms, disp_sample=None): 57 | # 1. build raw cost by concat 58 | cat_cost = self.dif_func(ref_fms, tgt_fms, disp_sample=disp_sample, **self.default_args) 59 | 60 | # 2. aggregate cost by 3D-hourglass 61 | costs = self.aggregator(cat_cost) 62 | 63 | return costs 64 | 65 | 66 | # Use the correlation between left and right feature to form cost volume 67 | class CorCostProcessor(CostProcessor): 68 | 69 | def __init__(self, cfg): 70 | super(CorCostProcessor, self).__init__() 71 | cor_func = cfg.model.cost_processor.cost_computation.get('type', 'default') 72 | self.cor_func = COR_FUNCS[cor_func] 73 | 74 | self.default_args = cfg.model.cost_processor.cost_computation.copy() 75 | self.default_args.pop('type') 76 | 77 | self.aggregator = build_cost_aggregator(cfg) 78 | 79 | def forward(self, ref_fms, tgt_fms, disp_sample=None): 80 | # 1. build raw cost by correlation 81 | cor_cost = self.cor_func(ref_fms, tgt_fms, disp_sample=disp_sample, **self.default_args) 82 | 83 | # 2. aggregate cost by 2D-hourglass 84 | costs = self.aggregator(cor_cost) 85 | 86 | return costs 87 | 88 | 89 | PROCESSORS = { 90 | 'Difference': DifCostProcessor, 91 | 'Concatenation': CatCostProcessor, 92 | 'Correlation': CorCostProcessor, 93 | 'DeepPruner': DeepPrunerProcessor, 94 | 'AnyNet': AnyNetProcessor, 95 | } 96 | 97 | def build_cost_processor(cfg): 98 | proc_type = cfg.model.cost_processor.type 99 | assert proc_type in PROCESSORS, "cost_processor type not found, excepted: {}," \ 100 | "but got {}".format(PROCESSORS.keys(), proc_type) 101 | 102 | args = dict( 103 | cfg=cfg, 104 | ) 105 | processor = PROCESSORS[proc_type](**args) 106 | 107 | return processor 108 | 109 | -------------------------------------------------------------------------------- /dmb/modeling/stereo/cost_processors/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DeepMotionAIResearch/DenseMatchingBenchmark/010aeb66e3ceaf3d866036b0ca751861df39432d/dmb/modeling/stereo/cost_processors/utils/__init__.py -------------------------------------------------------------------------------- /dmb/modeling/stereo/cost_processors/utils/cat_fms.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn.functional as F 3 | 4 | from dmb.modeling.stereo.layers.inverse_warp_3d import inverse_warp_3d 5 | 6 | 7 | def cat_fms(reference_fm, target_fm, max_disp=192, start_disp=0, dilation=1, disp_sample=None): 8 | """ 9 | Concat left and right in Channel dimension to form the raw cost volume. 10 | Args: 11 | max_disp, (int): under the scale of feature used, 12 | often equals to (end disp - start disp + 1), the maximum searching range of disparity 13 | start_disp (int): the start searching disparity index, usually be 0 14 | dilation (int): the step between near disparity index 15 | dilation (int): the step between near disparity index 16 | 17 | Inputs: 18 | reference_fm, (Tensor): reference feature, i.e. left image feature, in [BatchSize, Channel, Height, Width] layout 19 | target_fm, (Tensor): target feature, i.e. right image feature, in [BatchSize, Channel, Height, Width] layout 20 | 21 | Output: 22 | concat_fm, (Tensor): the formed cost volume, in [BatchSize, Channel*2, disp_sample_number, Height, Width] layout 23 | 24 | """ 25 | device = reference_fm.device 26 | N, C, H, W = reference_fm.shape 27 | 28 | end_disp = start_disp + max_disp - 1 29 | disp_sample_number = (max_disp + dilation - 1) // dilation 30 | disp_index = torch.linspace(start_disp, end_disp, disp_sample_number) 31 | 32 | concat_fm = torch.zeros(N, C * 2, disp_sample_number, H, W).to(device) 33 | idx = 0 34 | for i in disp_index: 35 | i = int(i) # convert torch.Tensor to int, so that it can be index 36 | if i > 0: 37 | concat_fm[:, :C, idx, :, i:] = reference_fm[:, :, :, i:] 38 | concat_fm[:, C:, idx, :, i:] = target_fm[:, :, :, :-i] 39 | elif i == 0: 40 | concat_fm[:, :C, idx, :, :] = reference_fm 41 | concat_fm[:, C:, idx, :, :] = target_fm 42 | else: 43 | concat_fm[:, :C, idx, :, :i] = reference_fm[:, :, :, :i] 44 | concat_fm[:, C:, idx, :, :i] = target_fm[:, :, :, abs(i):] 45 | idx = idx + 1 46 | 47 | concat_fm = concat_fm.contiguous() 48 | return concat_fm 49 | 50 | 51 | def fast_cat_fms(reference_fm, target_fm, max_disp=192, start_disp=0, dilation=1, disp_sample=None): 52 | device = reference_fm.device 53 | B, C, H, W = reference_fm.shape 54 | 55 | if disp_sample is None: 56 | end_disp = start_disp + max_disp - 1 57 | 58 | disp_sample_number = (max_disp + dilation - 1) // dilation 59 | D = disp_sample_number 60 | 61 | # generate disparity samples, in [B,D, H, W] layout 62 | disp_sample = torch.linspace(start_disp, end_disp, D) 63 | disp_sample = disp_sample.view(1, D, 1, 1).expand(B, D, H, W).to(device).float() 64 | 65 | else: # direct provide disparity samples 66 | # the number of disparity samples 67 | D = disp_sample.shape[1] 68 | 69 | # expand D dimension 70 | concat_reference_fm = reference_fm.unsqueeze(2).expand(B, C, D, H, W) 71 | concat_target_fm = target_fm.unsqueeze(2).expand(B, C, D, H, W) 72 | 73 | # shift target feature according to disparity samples 74 | concat_target_fm = inverse_warp_3d(concat_target_fm, -disp_sample, padding_mode='zeros') 75 | 76 | # mask out features in reference 77 | concat_reference_fm = concat_reference_fm * (concat_target_fm > 0).float() 78 | 79 | # [B, 2C, D, H, W) 80 | concat_fm = torch.cat((concat_reference_fm, concat_target_fm), dim=1) 81 | 82 | return concat_fm 83 | 84 | 85 | CAT_FUNCS = dict( 86 | default=cat_fms, 87 | fast_mode=fast_cat_fms, 88 | ) 89 | -------------------------------------------------------------------------------- /dmb/modeling/stereo/cost_processors/utils/correlation1d_cost.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | from spatial_correlation_sampler import SpatialCorrelationSampler 6 | 7 | def correlation1d_cost(reference_fm, target_fm, max_disp=192, start_disp=0, dilation=1, disp_sample=None, 8 | kernel_size=1, stride=1, padding=0, dilation_patch=1,): 9 | # for a pixel of left image at (x, y), it will calculates correlation cost volume 10 | # with pixel of right image at (xr, y), where xr in [x-max_disp, x+max_disp] 11 | # but we only need the left half part, i.e., [x-max_disp, 0] 12 | correlation_sampler = SpatialCorrelationSampler(patch_size=(1, max_disp * 2 - 1), 13 | kernel_size=kernel_size, 14 | stride=stride, padding=padding, 15 | dilation_patch=dilation_patch) 16 | # [B, 1, max_disp*2-1, H, W] 17 | out = correlation_sampler(reference_fm, target_fm) 18 | 19 | # [B, max_disp*2-1, H, W] 20 | out = out.squeeze(1) 21 | 22 | # [B, max_disp, H, W], grad the left half searching part 23 | out = out[:, :max_disp, :, :] 24 | 25 | cost = F.leaky_relu(out, negative_slope=0.1, inplace=True) 26 | 27 | return cost 28 | 29 | COR_FUNCS = dict( 30 | default=correlation1d_cost, 31 | ) 32 | -------------------------------------------------------------------------------- /dmb/modeling/stereo/cost_processors/utils/cost_norm.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | eps = 1e-5 5 | 6 | 7 | class _CostVolumeNorm(nn.Module): 8 | """ 9 | Normalize Cost Volume 10 | Args: 11 | dim (int): which dim to apply normalization operation, default dim is for the cost dim. 12 | affine (bool): whether the parameters are learnable, default is True 13 | weight (float): weight for cost re-range 14 | bias (float): bias for cost 15 | Shape: 16 | - Input: :math:`(N, *)` 17 | - Output: :math:`(N, *)` (same shape as input) 18 | """ 19 | 20 | def __init__(self, dim=1, affine=True, weight=1, bias=0): 21 | super(_CostVolumeNorm, self).__init__() 22 | self.dim = dim 23 | self.affine = affine 24 | if self.affine: 25 | self.weight = nn.Parameter(data=torch.Tensor(1), requires_grad=True) 26 | self.bias = nn.Parameter(data=torch.Tensor(1), requires_grad=True) 27 | else: 28 | self.weight = nn.Parameter(data=torch.Tensor(1), requires_grad=False) 29 | self.bias = nn.Parameter(data=torch.Tensor(1), requires_grad=False) 30 | 31 | # init weight and bias 32 | self.weight.data.fill_(weight) 33 | self.bias.data.fill_(bias) 34 | 35 | def forward(self, input): 36 | raise NotImplementedError 37 | 38 | 39 | class RangeNorm(_CostVolumeNorm): 40 | def __init__(self, dim=1, affine=True, weight=1, bias=0): 41 | super(RangeNorm, self).__init__(dim=dim, affine=affine, weight=weight, bias=bias) 42 | 43 | def forward(self, input): 44 | # compute mean value 45 | mean = input.min(dim=self.dim, keepdim=True)[0] 46 | # compute margin 47 | var = input.max(dim=self.dim, keepdim=True)[0] - input.min(dim=self.dim, keepdim=True)[0] 48 | # normalize 49 | normalized_input = (input - mean) / (var + eps) 50 | # apply weight and bias 51 | output = normalized_input * self.weight + self.bias 52 | 53 | return output 54 | 55 | 56 | class VarNorm(_CostVolumeNorm): 57 | def __init__(self, dim=1, affine=True, weight=1, bias=0): 58 | super(VarNorm, self).__init__(dim=dim, affine=affine, weight=weight, bias=bias) 59 | 60 | def forward(self, input): 61 | # compute mean value 62 | mean = input.mean(dim=self.dim, keepdim=True) 63 | # compute var value 64 | var = input.var(dim=self.dim, keepdim=True) 65 | # normalize 66 | normalized_input = (input - mean).abs() / (var + eps) 67 | # apply weight and bias 68 | output = normalized_input * self.weight + self.bias 69 | 70 | return output 71 | 72 | 73 | class StdNorm(_CostVolumeNorm): 74 | def __init__(self, dim=1, affine=True, weight=1, bias=0): 75 | super(StdNorm, self).__init__(dim=dim, affine=affine, weight=weight, bias=bias) 76 | 77 | def forward(self, input): 78 | # compute mean value 79 | mean = input.mean(dim=self.dim, keepdim=True) 80 | # compute var value 81 | var = input.std(dim=self.dim, keepdim=True) 82 | # normalize 83 | normalized_input = (input - mean).abs() / (var + eps) 84 | # apply weight and bias 85 | output = normalized_input * self.weight + self.bias 86 | 87 | return output 88 | 89 | 90 | class SigmoidNorm(_CostVolumeNorm): 91 | def __init__(self, dim=1, affine=True, weight=1, bias=0): 92 | super(SigmoidNorm, self).__init__(dim=dim, affine=affine, weight=weight, bias=bias) 93 | 94 | def forward(self, input): 95 | # normalize 96 | normalized_input = torch.sigmoid(input) 97 | # apply weight and bias 98 | output = normalized_input * self.weight + self.bias 99 | 100 | return output 101 | -------------------------------------------------------------------------------- /dmb/modeling/stereo/cost_processors/utils/dif_fms.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn.functional as F 3 | 4 | from dmb.modeling.stereo.layers.inverse_warp_3d import inverse_warp_3d 5 | 6 | 7 | def dif_fms(reference_fm, target_fm, max_disp=192, start_disp=0, dilation=1, disp_sample=None, 8 | normalize=False, p=1.0): 9 | """ 10 | Concat left and right in Channel dimension to form the raw cost volume. 11 | Args: 12 | max_disp, (int): under the scale of feature used, 13 | often equals to (end disp - start disp + 1), the maximum searching range of disparity 14 | start_disp (int): the start searching disparity index, usually be 0 15 | dilation (int): the step between near disparity index 16 | dilation (int): the step between near disparity index 17 | 18 | Inputs: 19 | reference_fm, (Tensor): reference feature, i.e. left image feature, in [BatchSize, Channel, Height, Width] layout 20 | target_fm, (Tensor): target feature, i.e. right image feature, in [BatchSize, Channel, Height, Width] layout 21 | 22 | Output: 23 | dif_fm, (Tensor): the formed cost volume, in [BatchSize, Channel, disp_sample_number, Height, Width] layout 24 | 25 | """ 26 | device = reference_fm.device 27 | N, C, H, W = reference_fm.shape 28 | 29 | end_disp = start_disp + max_disp - 1 30 | disp_sample_number = (max_disp + dilation - 1) // dilation 31 | disp_index = torch.linspace(start_disp, end_disp, disp_sample_number) 32 | 33 | dif_fm = torch.zeros(N, C, disp_sample_number, H, W).to(device) 34 | idx = 0 35 | for i in disp_index: 36 | i = int(i) # convert torch.Tensor to int, so that it can be index 37 | if i > 0: 38 | dif_fm[:, :, idx, :, i:] = reference_fm[:, :, :, i:] - target_fm[:, :, :, :-i] 39 | elif i == 0: 40 | dif_fm[:, :, idx, :, :] = reference_fm - target_fm 41 | else: 42 | dif_fm[:, :, idx, :, :i] = reference_fm[:, :, :, :i] - target_fm[:, :, :, abs(i):] 43 | idx = idx + 1 44 | 45 | dif_fm = dif_fm.contiguous() 46 | return dif_fm 47 | 48 | 49 | def fast_dif_fms(reference_fm, target_fm, max_disp=192, start_disp=0, dilation=1, disp_sample=None, 50 | normalize=False, p=1.0,): 51 | device = reference_fm.device 52 | B, C, H, W = reference_fm.shape 53 | 54 | if disp_sample is None: 55 | end_disp = start_disp + max_disp - 1 56 | 57 | disp_sample_number = (max_disp + dilation - 1) // dilation 58 | D = disp_sample_number 59 | 60 | # generate disparity samples, in [B,D, H, W] layout 61 | disp_sample = torch.linspace(start_disp, end_disp, D) 62 | disp_sample = disp_sample.view(1, D, 1, 1).expand(B, D, H, W).to(device).float() 63 | 64 | else: # direct provide disparity samples 65 | # the number of disparity samples 66 | D = disp_sample.shape[1] 67 | 68 | # expand D dimension 69 | dif_reference_fm = reference_fm.unsqueeze(2).expand(B, C, D, H, W) 70 | dif_target_fm = target_fm.unsqueeze(2).expand(B, C, D, H, W) 71 | 72 | # shift reference feature map with disparity through grid sample 73 | # shift target feature according to disparity samples 74 | dif_target_fm = inverse_warp_3d(dif_target_fm, -disp_sample, padding_mode='zeros') 75 | 76 | # mask out features in reference 77 | dif_reference_fm = dif_reference_fm * (dif_target_fm > 0).type_as(dif_reference_fm) 78 | 79 | # [B, C, D, H, W) 80 | dif_fm = dif_reference_fm - dif_target_fm 81 | 82 | if normalize: 83 | # [B, D, H, W] 84 | dif_fm = torch.norm(dif_fm, p=p, dim=1, keepdim=False) 85 | 86 | return dif_fm 87 | 88 | 89 | DIF_FUNCS = dict( 90 | default=dif_fms, 91 | fast_mode=fast_dif_fms, 92 | ) 93 | -------------------------------------------------------------------------------- /dmb/modeling/stereo/cost_processors/utils/hourglass.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | from dmb.modeling.stereo.layers.basic_layers import conv3d_bn, conv3d_bn_relu, conv_bn_relu, deconv3d_bn 6 | 7 | 8 | class Hourglass(nn.Module): 9 | """ 10 | An implementation of hourglass module proposed in PSMNet. 11 | Args: 12 | in_planes (int): the channels of raw cost volume 13 | batch_norm (bool): whether use batch normalization layer, 14 | default True 15 | Inputs: 16 | x, (Tensor): cost volume 17 | in [BatchSize, in_planes, MaxDisparity, Height, Width] layout 18 | presqu, (optional, Tensor): cost volume 19 | in [BatchSize, in_planes * 2, MaxDisparity, Height/2, Width/2] layout 20 | postsqu, (optional, Tensor): cost volume 21 | in [BatchSize, in_planes * 2, MaxDisparity, Height/2, Width/2] layout 22 | Outputs: 23 | out, (Tensor): cost volume 24 | in [BatchSize, in_planes, MaxDisparity, Height, Width] layout 25 | pre, (optional, Tensor): cost volume 26 | in [BatchSize, in_planes * 2, MaxDisparity, Height/2, Width/2] layout 27 | post, (optional, Tensor): cost volume 28 | in [BatchSize, in_planes * 2, MaxDisparity, Height/2, Width/2] layout 29 | 30 | """ 31 | def __init__(self, in_planes, batch_norm=True): 32 | super(Hourglass, self).__init__() 33 | self.batch_norm = batch_norm 34 | 35 | self.conv1 = conv3d_bn_relu( 36 | self.batch_norm, in_planes, in_planes * 2, 37 | kernel_size=3, stride=2, padding=1, bias=False 38 | ) 39 | 40 | self.conv2 = conv3d_bn( 41 | self.batch_norm, in_planes * 2, in_planes * 2, 42 | kernel_size=3, stride=1, padding=1, bias=False 43 | ) 44 | 45 | self.conv3 = conv3d_bn_relu( 46 | self.batch_norm, in_planes * 2, in_planes * 2, 47 | kernel_size=3, stride=2, padding=1, bias=False 48 | ) 49 | self.conv4 = conv3d_bn_relu( 50 | self.batch_norm, in_planes * 2, in_planes * 2, 51 | kernel_size=3, stride=1, padding=1, bias=False 52 | ) 53 | self.conv5 = deconv3d_bn( 54 | self.batch_norm, in_planes * 2, in_planes * 2, 55 | kernel_size=3, padding=1, output_padding=1, stride=2, bias=False 56 | ) 57 | self.conv6 = deconv3d_bn( 58 | self.batch_norm, in_planes * 2, in_planes, 59 | kernel_size=3, padding=1, output_padding=1, stride=2, bias=False 60 | ) 61 | 62 | def forward(self, x, presqu=None, postsqu=None): 63 | # in: [B, C, D, H, W], out: [B, 2C, D, H/2, W/2] 64 | out = self.conv1(x) 65 | # in: [B, 2C, D, H/2, W/2], out: [B, 2C, D, H/2, W/2] 66 | pre = self.conv2(out) 67 | if postsqu is not None: 68 | pre = F.relu(pre + postsqu, inplace=True) 69 | else: 70 | pre = F.relu(pre, inplace=True) 71 | 72 | # in: [B, 2C, D, H/2, W/2], out: [B, 2C, D, H/4, W/4] 73 | out = self.conv3(pre) 74 | # in: [B, 2C, D, H/4, W/4], out: [B, 2C, D, H/4, W/4] 75 | out = self.conv4(out) 76 | 77 | # in: [B, 2C, D, H/4, W/4], out: [B, 2C, D, H/2, W/2] 78 | if presqu is not None: 79 | post = F.relu(self.conv5(out) + presqu, inplace=True) 80 | else: 81 | post = F.relu(self.conv5(out) + pre, inplace=True) 82 | 83 | # in: [B, 2C, D, H/2, W/2], out: [B, C, D, H, W] 84 | out = self.conv6(post) 85 | 86 | return out, pre, post 87 | -------------------------------------------------------------------------------- /dmb/modeling/stereo/cost_processors/utils/hourglass_2d.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | from dmb.modeling.stereo.layers.basic_layers import conv_bn, conv_bn_relu, deconv_bn 6 | 7 | 8 | class Hourglass2D(nn.Module): 9 | """ 10 | An implementation of 2d hourglass module proposed in PSMNet. 11 | Args: 12 | in_planes (int): the channels of raw cost volume 13 | batch_norm (bool): whether use batch normalization layer, 14 | default True 15 | Inputs: 16 | x, (Tensor): cost volume 17 | in [BatchSize, in_planes, Height, Width] layout 18 | presqu, (optional, Tensor): cost volume 19 | in [BatchSize, in_planes * 2, Height/2, Width/2] layout 20 | postsqu, (optional, Tensor): cost volume 21 | in [BatchSize, in_planes * 2, Height/2, Width/2] layout 22 | Outputs: 23 | out, (Tensor): cost volume 24 | in [BatchSize, in_planes, MaxDisparity, Height, Width] layout 25 | pre, (optional, Tensor): cost volume 26 | in [BatchSize, in_planes * 2, Height/2, Width/2] layout 27 | post, (optional, Tensor): cost volume 28 | in [BatchSize, in_planes * 2, Height/2, Width/2] layout 29 | 30 | """ 31 | def __init__(self, in_planes, batch_norm=True): 32 | super(Hourglass2D, self).__init__() 33 | self.batch_norm = batch_norm 34 | 35 | self.conv1 = conv_bn_relu( 36 | self.batch_norm, in_planes, in_planes * 2, 37 | kernel_size=3, stride=2, padding=1, bias=False 38 | ) 39 | 40 | self.conv2 = conv_bn( 41 | self.batch_norm, in_planes * 2, in_planes * 2, 42 | kernel_size=3, stride=1, padding=1, bias=False 43 | ) 44 | 45 | self.conv3 = conv_bn_relu( 46 | self.batch_norm, in_planes * 2, in_planes * 2, 47 | kernel_size=3, stride=2, padding=1, bias=False 48 | ) 49 | self.conv4 = conv_bn_relu( 50 | self.batch_norm, in_planes * 2, in_planes * 2, 51 | kernel_size=3, stride=1, padding=1, bias=False 52 | ) 53 | self.conv5 = deconv_bn( 54 | self.batch_norm, in_planes * 2, in_planes * 2, 55 | kernel_size=3, padding=1, output_padding=1, stride=2, bias=False 56 | ) 57 | self.conv6 = deconv_bn( 58 | self.batch_norm, in_planes * 2, in_planes, 59 | kernel_size=3, padding=1, output_padding=1, stride=2, bias=False 60 | ) 61 | 62 | def forward(self, x, presqu=None, postsqu=None): 63 | # in: [B, C, H, W], out: [B, 2C, H/2, W/2] 64 | out = self.conv1(x) 65 | # in: [B, 2C, H/2, W/2], out: [B, 2C, H/2, W/2] 66 | pre = self.conv2(out) 67 | if postsqu is not None: 68 | pre = F.relu(pre + postsqu, inplace=True) 69 | else: 70 | pre = F.relu(pre, inplace=True) 71 | 72 | # in: [B, 2C, H/2, W/2], out: [B, 2C, H/4, W/4] 73 | out = self.conv3(pre) 74 | # in: [B, 2C, H/4, W/4], out: [B, 2C, H/4, W/4] 75 | out = self.conv4(out) 76 | 77 | # in: [B, 2C, H/4, W/4], out: [B, 2C, H/2, W/2] 78 | if presqu is not None: 79 | post = F.relu(self.conv5(out) + presqu, inplace=True) 80 | else: 81 | post = F.relu(self.conv5(out) + pre, inplace=True) 82 | 83 | # in: [B, 2C, H/2, W/2], out: [B, C, H, W] 84 | out = self.conv6(post) 85 | 86 | return out, pre, post 87 | -------------------------------------------------------------------------------- /dmb/modeling/stereo/cost_processors/utils/hw_hourglass.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | from dmb.modeling.stereo.layers.basic_layers import conv3d_bn, conv3d_bn_relu, conv_bn_relu, deconv3d_bn 6 | 7 | 8 | class HWHourglass(nn.Module): 9 | """ 10 | An implementation of hourglass module proposed in DeepPruner. 11 | Although input 3D cost volume, but stride is only imposed on Height, Width dimension 12 | 13 | Args: 14 | in_planes (int): the channels of raw cost volume 15 | batch_norm (bool): whether use batch normalization layer, 16 | default True 17 | 18 | Inputs: 19 | raw_cost, (Tensor): raw cost volume 20 | in [BatchSize, in_planes, MaxDisparity, Height, Width] layout 21 | 22 | Outputs: 23 | cost, (Tensor): processed cost volume 24 | in [BatchSize, in_planes, MaxDisparity, Height, Width] layout 25 | 26 | """ 27 | def __init__(self, in_planes, batch_norm=True): 28 | super(HWHourglass, self).__init__() 29 | self.batch_norm = batch_norm 30 | 31 | self.conv1_a = conv3d_bn_relu( 32 | self.batch_norm, in_planes, in_planes * 2, 33 | kernel_size=3, stride=(1,2,2), padding=1, bias=False 34 | ) 35 | 36 | self.conv1_b = conv3d_bn_relu( 37 | self.batch_norm, in_planes * 2, in_planes * 2, 38 | kernel_size=3, stride=(1,1,1), padding=1, bias=False 39 | ) 40 | 41 | self.conv1_d = deconv3d_bn( 42 | self.batch_norm, in_planes * 2, in_planes, 43 | kernel_size=3, padding=1, output_padding=(0,1,1), stride=(1,2,2), bias=False 44 | ) 45 | 46 | self.conv2_a = conv3d_bn_relu( 47 | self.batch_norm, in_planes * 2, in_planes * 4, 48 | kernel_size=3, stride=(1, 2, 2), padding=1, bias=False 49 | ) 50 | 51 | self.conv2_b = conv3d_bn_relu( 52 | self.batch_norm, in_planes * 4, in_planes * 4, 53 | kernel_size=3, stride=(1, 1, 1), padding=1, bias=False 54 | ) 55 | 56 | self.conv2_d = deconv3d_bn( 57 | self.batch_norm, in_planes * 4, in_planes * 2, 58 | kernel_size=3, padding=1, output_padding=(0, 1, 1), stride=(1, 2, 2), bias=False 59 | ) 60 | 61 | self.conv3_a = conv3d_bn_relu( 62 | self.batch_norm, in_planes * 4, in_planes * 8, 63 | kernel_size=3, stride=(1, 2, 2), padding=1, bias=False 64 | ) 65 | 66 | self.conv3_b = conv3d_bn_relu( 67 | self.batch_norm, in_planes * 8, in_planes * 8, 68 | kernel_size=3, stride=(1, 1, 1), padding=1, bias=False 69 | ) 70 | 71 | self.conv3_d = deconv3d_bn( 72 | self.batch_norm, in_planes * 8, in_planes * 4, 73 | kernel_size=3, padding=1, output_padding=(0, 1, 1), stride=(1, 2, 2), bias=False 74 | ) 75 | 76 | 77 | def forward(self, raw_cost): 78 | # in: [B, C, D, H, W], out: [B, 2C, D, H/2, W/2] 79 | out1_a = self.conv1_a(raw_cost) 80 | 81 | # in: [B, 2C, D, H/2, W/2], out: [B, 2C, D, H/2, W/2] 82 | out1_b = self.conv1_b(out1_a) + out1_a 83 | 84 | # in: [B, 2C, D, H/2, W/2], out: [B, 4C, D, H/4, W/4] 85 | out2_a = self.conv2_a(out1_b) 86 | 87 | # in: [B, 4C, D, H/4, W/4], out: [B, 4C, D, H/4, W/4] 88 | out2_b = self.conv2_b(out2_a) + out2_a 89 | 90 | # in: [B, 8C, D, H/8, W/8], out: [B, 8C, D, H/8, W/8] 91 | out3_a = self.conv3_a(out2_b) 92 | 93 | # in: [B, 8C, D, H/8, W/8], out: [B, 8C, D, H/8, W/8] 94 | out3_b = self.conv3_b(out3_a) + out3_a 95 | 96 | # in: [B, 8C, D, H/8, W/8], out: [B, 4C, D, H/4, W/4] 97 | cost = self.conv3_d(out3_b) + out2_b 98 | 99 | # in: [B, 4C, D, H/4, W/4], out: [B, 2C, D, H/2, W/2] 100 | cost = self.conv2_d(cost) + out1_b 101 | 102 | # in: [B, 2C, D, H/2, W/2], out: [B, C, D, H, W] 103 | cost = self.conv1_d(cost) 104 | 105 | return cost 106 | -------------------------------------------------------------------------------- /dmb/modeling/stereo/disp_predictors/__init__.py: -------------------------------------------------------------------------------- 1 | from .builder import build_disp_predictor 2 | -------------------------------------------------------------------------------- /dmb/modeling/stereo/disp_predictors/builder.py: -------------------------------------------------------------------------------- 1 | from .faster_soft_argmin import FasterSoftArgmin 2 | from .local_soft_argmin import LocalSoftArgmin 3 | from .soft_argmin import SoftArgmin 4 | 5 | PREDICTORS = { 6 | 'DEFAULT': SoftArgmin, 7 | 'FASTER': FasterSoftArgmin, 8 | 'LOCAL': LocalSoftArgmin, 9 | } 10 | 11 | 12 | def build_disp_predictor(cfg): 13 | pred_type = cfg.model.disp_predictor.get('type', 'FASTER') 14 | 15 | assert pred_type in PREDICTORS, 'disparity predictor type not found, expected: {},' \ 16 | 'but got {}'.format(PREDICTORS.keys(), pred_type) 17 | 18 | default_args = cfg.model.disp_predictor.copy() 19 | default_args.pop('type') 20 | 21 | disp_predictor = PREDICTORS[pred_type](**default_args) 22 | 23 | return disp_predictor 24 | -------------------------------------------------------------------------------- /dmb/modeling/stereo/disp_predictors/faster_soft_argmin.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | 6 | class FasterSoftArgmin(nn.Module): 7 | """ 8 | A faster implementation of soft argmin. 9 | details can refer to dmb.modeling.stereo.disp_predictors.soft_argmin 10 | Args: 11 | max_disp, (int): under the scale of feature used, 12 | often equals to (end disp - start disp + 1), the maximum searching range of disparity 13 | start_disp (int): the start searching disparity index, usually be 0 14 | dilation (optional, int): the step between near disparity index 15 | alpha (float or int): a factor will times with cost_volume 16 | details can refer to: https://bouthilx.wordpress.com/2013/04/21/a-soft-argmax/ 17 | normalize (bool): whether apply softmax on cost_volume, default True 18 | 19 | Inputs: 20 | cost_volume (Tensor): the matching cost after regularization, 21 | in [BatchSize, disp_sample_number, Height, Width] layout 22 | disp_sample (optional, Tensor): the estimated disparity samples, 23 | in [BatchSize, disp_sample_number, Height, Width] layout. NOT USED! 24 | Returns: 25 | disp_map (Tensor): a disparity map regressed from cost volume, 26 | in [BatchSize, 1, Height, Width] layout 27 | """ 28 | 29 | def __init__(self, max_disp, start_disp=0, dilation=1, alpha=1.0, normalize=True): 30 | super(FasterSoftArgmin, self).__init__() 31 | self.max_disp = max_disp 32 | self.start_disp = start_disp 33 | self.dilation = dilation 34 | self.end_disp = start_disp + max_disp - 1 35 | self.disp_sample_number = (max_disp + dilation - 1) // dilation 36 | 37 | self.alpha = alpha 38 | self.normalize = normalize 39 | 40 | # compute disparity index: (1 ,1, disp_sample_number, 1, 1) 41 | disp_sample = torch.linspace( 42 | self.start_disp, self.end_disp, self.disp_sample_number 43 | ) 44 | disp_sample = disp_sample.repeat(1, 1, 1, 1, 1).permute(0, 1, 4, 2, 3).contiguous() 45 | 46 | self.disp_regression = nn.Conv3d(1, 1, (self.disp_sample_number, 1, 1), 1, 0, bias=False) 47 | 48 | self.disp_regression.weight.data = disp_sample 49 | self.disp_regression.weight.requires_grad = False 50 | 51 | def forward(self, cost_volume, disp_sample=None): 52 | 53 | # note, cost volume direct represent similarity 54 | # 'c' or '-c' do not affect the performance because feature-based cost volume provided flexibility. 55 | 56 | if cost_volume.dim() != 4: 57 | raise ValueError('expected 4D input (got {}D input)' 58 | .format(cost_volume.dim())) 59 | 60 | # scale cost volume with alpha 61 | cost_volume = cost_volume * self.alpha 62 | 63 | if self.normalize: 64 | prob_volume = F.softmax(cost_volume, dim=1) 65 | else: 66 | prob_volume = cost_volume 67 | 68 | # [B, disp_sample_number, W, H] -> [B, 1, disp_sample_number, W, H] 69 | prob_volume = prob_volume.unsqueeze(1) 70 | 71 | disp_map = self.disp_regression(prob_volume) 72 | # [B, 1, 1, W, H] -> [B, 1, W, H] 73 | disp_map = disp_map.squeeze(1) 74 | 75 | return disp_map 76 | 77 | def __repr__(self): 78 | repr_str = '{}\n'.format(self.__class__.__name__) 79 | repr_str += ' ' * 4 + 'Max Disparity: {}\n'.format(self.max_disp) 80 | repr_str += ' ' * 4 + 'Start disparity: {}\n'.format(self.start_disp) 81 | repr_str += ' ' * 4 + 'Dilation rate: {}\n'.format(self.dilation) 82 | repr_str += ' ' * 4 + 'Alpha: {}\n'.format(self.alpha) 83 | repr_str += ' ' * 4 + 'Normalize: {}\n'.format(self.normalize) 84 | 85 | return repr_str 86 | 87 | @property 88 | def name(self): 89 | return 'FasterSoftArgmin' 90 | -------------------------------------------------------------------------------- /dmb/modeling/stereo/disp_predictors/soft_argmin.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | class SoftArgmin(nn.Module): 6 | """ 7 | An implementation of soft argmin. 8 | Args: 9 | max_disp, (int): under the scale of feature used, 10 | often equals to (end disp - start disp + 1), the maximum searching range of disparity 11 | start_disp (int): the start searching disparity index, usually be 0 12 | dilation (optional, int): the step between near disparity index 13 | alpha (float or int): a factor will times with cost_volume 14 | details can refer to: https://bouthilx.wordpress.com/2013/04/21/a-soft-argmax/ 15 | normalize (bool): whether apply softmax on cost_volume, default True 16 | 17 | Inputs: 18 | cost_volume (Tensor): the matching cost after regularization, 19 | in [BatchSize, disp_sample_number, Height, Width] layout 20 | disp_sample (optional, Tensor): the estimated disparity samples, 21 | in [BatchSize, disp_sample_number, Height, Width] layout 22 | 23 | Returns: 24 | disp_map (Tensor): a disparity map regressed from cost volume, 25 | in [BatchSize, 1, Height, Width] layout 26 | """ 27 | 28 | def __init__(self, max_disp=192, start_disp=0, dilation=1, alpha=1.0, normalize=True): 29 | super(SoftArgmin, self).__init__() 30 | self.max_disp = max_disp 31 | self.start_disp = start_disp 32 | self.dilation = dilation 33 | self.end_disp = start_disp + max_disp - 1 34 | self.disp_sample_number = (max_disp + dilation - 1) // dilation 35 | 36 | self.alpha = alpha 37 | self.normalize = normalize 38 | 39 | # generate disparity sample, in [disp_sample_number,] layout 40 | self.disp_sample = torch.linspace( 41 | self.start_disp, self.end_disp, self.disp_sample_number 42 | ) 43 | 44 | def forward(self, cost_volume, disp_sample=None): 45 | 46 | # note, cost volume direct represent similarity 47 | # 'c' or '-c' do not affect the performance because feature-based cost volume provided flexibility. 48 | 49 | if cost_volume.dim() != 4: 50 | raise ValueError('expected 4D input (got {}D input)' 51 | .format(cost_volume.dim())) 52 | 53 | # scale cost volume with alpha 54 | cost_volume = cost_volume * self.alpha 55 | 56 | if self.normalize: 57 | prob_volume = F.softmax(cost_volume, dim=1) 58 | else: 59 | prob_volume = cost_volume 60 | 61 | B, D, H, W = cost_volume.shape 62 | 63 | if disp_sample is None: 64 | assert D == self.disp_sample_number, 'The number of disparity samples should be' \ 65 | ' consistent!' 66 | disp_sample = self.disp_sample.repeat(B, H, W, 1).permute(0, 3, 1, 2).contiguous() 67 | disp_sample = disp_sample.to(cost_volume.device) 68 | 69 | else: 70 | assert D == disp_sample.shape[1], 'The number of disparity samples should be' \ 71 | ' consistent!' 72 | # compute disparity: (BatchSize, 1, Height, Width) 73 | disp_map = torch.sum(prob_volume * disp_sample, dim=1, keepdim=True) 74 | 75 | return disp_map 76 | 77 | def __repr__(self): 78 | repr_str = '{}\n'.format(self.__class__.__name__) 79 | repr_str += ' ' * 4 + 'Max Disparity: {}\n'.format(self.max_disp) 80 | repr_str += ' ' * 4 + 'Start disparity: {}\n'.format(self.start_disp) 81 | repr_str += ' ' * 4 + 'Dilation rate: {}\n'.format(self.dilation) 82 | repr_str += ' ' * 4 + 'Alpha: {}\n'.format(self.alpha) 83 | repr_str += ' ' * 4 + 'Normalize: {}\n'.format(self.normalize) 84 | 85 | return repr_str 86 | 87 | @property 88 | def name(self): 89 | return 'SoftArgmin' 90 | -------------------------------------------------------------------------------- /dmb/modeling/stereo/disp_refinement/AnyNet.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | from dmb.modeling.stereo.layers.basic_layers import conv_bn_relu 6 | from dmb.ops import GateRecurrent2dnoind 7 | 8 | class AnyNetRefinement(nn.Module): 9 | """ 10 | 11 | The disparity refinement module proposed in AnyNet. 12 | 13 | Args: 14 | in_planes (int): the channels of input 15 | spn_planes (int): the channels used for spn 16 | batch_norm (bool): whether use batch normalization layer, default True 17 | 18 | Inputs: 19 | disps (list of Tensor): estimated disparity map, in [BatchSize, 1, Height//s, Width//s] layout 20 | left (Tensor): left image feature, in [BatchSize, Channels, Height, Width] layout 21 | right(Tensor): right image feature, in [BatchSize, Channels, Height, Width] layout 22 | leftImage (Tensor): left image, in [BatchSize, 3, Height, Width] layout 23 | rightImage (Tensor): right image, in [BatchSize, 3, Height, Width] layout 24 | 25 | Outputs: 26 | refine_disps (list of Tensor): refined disparity map, in [BatchSize, 1, Height, Width] layout 27 | 28 | 29 | """ 30 | def __init__(self, in_planes, spn_planes=8, batch_norm=True): 31 | super(AnyNetRefinement, self).__init__() 32 | self.in_planes = in_planes 33 | self.spn_planes = spn_planes 34 | self.batch_norm = batch_norm 35 | 36 | self.img_conv = nn.Sequential( 37 | conv_bn_relu(batch_norm, in_planes, spn_planes * 2, 38 | kernel_size=3, stride=1, padding=1, dilation=1, bias=False), 39 | conv_bn_relu(batch_norm, spn_planes * 2, spn_planes * 2, 40 | kernel_size=3, stride=1, padding=1, dilation=1, bias=False), 41 | conv_bn_relu(batch_norm, spn_planes * 2, spn_planes * 2, 42 | kernel_size=3, stride=1, padding=1, dilation=1, bias=False), 43 | nn.Conv2d(spn_planes * 2, spn_planes * 3, 44 | kernel_size=3, stride=1, padding=1, dilation=1, bias=False), 45 | ) 46 | 47 | self.disp_conv = nn.Conv2d(1, spn_planes, kernel_size=3, 48 | stride=1, padding=1, dilation=1, bias=False) 49 | 50 | self.classify = nn.Conv2d(spn_planes, 1, kernel_size=3, 51 | stride=1, padding=1, dilation=1, bias=False) 52 | 53 | # left->right propagation 54 | self.spn = GateRecurrent2dnoind(True,False) 55 | 56 | 57 | def forward(self, disps, left, right, leftImage, rightImage): 58 | # only disparity map in the last stage need to be refined 59 | init_disp = disps[-1] 60 | 61 | # down-sample the left image to the resolution of disparity map 62 | h, w = init_disp.shape[-2:] 63 | leftImage = F.interpolate(leftImage, size=(h,w), mode='bilinear', align_corners=False) 64 | 65 | # extract guidance information from left image 66 | # [B, spn_planes*3, H, W] 67 | G = self.img_conv(leftImage) 68 | 69 | # G1~G3: three coefficient maps (e.g., left-top, left-center, left-bottom) 70 | # [B, spn_planes, H, W] 71 | G1, G2, G3 = torch.split(G, self.spn_planes, dim=1) 72 | 73 | # for any pixel i, |G1(i)| + |G2(i)| + |G3(i)| <= 1 is a sufficient condition for model stability 74 | # [B, spn_planes, H, W] 75 | sum_abs = G1.abs() + G2.abs() + G3.abs() 76 | G1 = torch.div(G1, sum_abs + 1e-8) 77 | G2 = torch.div(G2, sum_abs + 1e-8) 78 | G3 = torch.div(G3, sum_abs + 1e-8) 79 | 80 | # [B, spn_planes, H, W] 81 | disp_feat = self.disp_conv(init_disp) 82 | 83 | # [B, spn_planes, H, W] 84 | propogated_disp_feat = self.spn(disp_feat, G1, G2, G3) 85 | 86 | # [B, 1, H, W] 87 | res_disp = self.classify(propogated_disp_feat) 88 | 89 | # [B, 1, H, W] 90 | refine_disp = F.relu(res_disp + init_disp) 91 | 92 | disps.append(refine_disp) 93 | # In this framework, we always keep the better disparity map be ahead the worse. 94 | disps.reverse() 95 | 96 | return disps 97 | 98 | 99 | 100 | -------------------------------------------------------------------------------- /dmb/modeling/stereo/disp_refinement/DeepPruner.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | from dmb.modeling.stereo.layers.basic_layers import conv_bn, conv_bn_relu 6 | 7 | 8 | class RefinementHeand(nn.Module): 9 | """ 10 | Args: 11 | in_planes (int): the channels of input 12 | batch_norm (bool): whether use batch normalization layer, default True 13 | 14 | Inputs: 15 | disps (Tensor): estimated disparity map, in [BatchSize, 1, Height, Width] layout 16 | input (Tensor): feature used to guide refinement, in [BatchSize, in_planes, Height, Width] layout 17 | 18 | Outputs: 19 | refine_disp (Tensor): refined disparity map, in [BatchSize, 1, Height, Width] layout 20 | 21 | """ 22 | def __init__(self, in_planes, batch_norm=True): 23 | super(RefinementHeand, self).__init__() 24 | self.in_planes = in_planes 25 | self.batch_norm = batch_norm 26 | 27 | self.conv = nn.Sequential( 28 | conv_bn_relu(batch_norm, in_planes, 32, kernel_size=3, stride=1, padding=1, bias=False), 29 | conv_bn_relu(batch_norm, 32, 32, kernel_size=3, stride=1, padding=1, dilation=1, bias=False), 30 | conv_bn_relu(batch_norm, 32, 32, kernel_size=3, stride=1, padding=1, dilation=1, bias=False), 31 | conv_bn_relu(batch_norm, 32, 16, kernel_size=3, stride=1, padding=2, dilation=2, bias=False), 32 | conv_bn_relu(batch_norm, 16, 16, kernel_size=3, stride=1, padding=4, dilation=4, bias=False), 33 | conv_bn_relu(batch_norm, 16, 16, kernel_size=3, stride=1, padding=1, dilation=1, bias=False) 34 | ) 35 | 36 | self.classify = nn.Conv2d(16, 1, kernel_size=3, padding=1, stride=1, bias=False) 37 | 38 | def forward(self, init_disp, input): 39 | 40 | res_disp = self.classify(self.conv(input)) 41 | 42 | refine_disp = F.relu(res_disp + init_disp) 43 | 44 | return refine_disp 45 | 46 | 47 | class DeepPrunerRefinement(nn.Module): 48 | """ 49 | The disparity refinement module proposed in DeepPruner. 50 | 51 | Args: 52 | in_planes (list, tuple): the channels of input of each refinement sub network 53 | batch_norm (bool): whether use batch normalization layer, default True 54 | num (int): the number of cascade refinement sub network, default 1 55 | 56 | Inputs: 57 | disps (list of Tensor): estimated disparity map, in [BatchSize, 1, Height, Width] layout 58 | input (Tensor): feature used to guide refinement, in [BatchSize, in_planes, Height, Width] layout 59 | 60 | Outputs: 61 | refine_disps (list of Tensor): refined disparity map, in [BatchSize, 1, Height, Width] layout 62 | 63 | """ 64 | 65 | def __init__(self, in_planes_list, batch_norm=True, num=1): 66 | super(DeepPrunerRefinement, self).__init__() 67 | self.in_planes_list = in_planes_list 68 | self.batch_norm = batch_norm 69 | self.num = num 70 | 71 | # cascade the edge aware refinement module 72 | self.refine_blocks = nn.ModuleList([ 73 | RefinementHeand(self.in_planes_list[i], self.batch_norm) for i in range(self.num) 74 | ]) 75 | 76 | 77 | def forward(self, disps, low_ref_group_fms): 78 | 79 | for i in range(self.num): 80 | # get last stage disparity map 81 | init_disp = disps[-1] 82 | # concatenate last stage disparity map into guide feature map 83 | guide_fms = torch.cat((low_ref_group_fms[i], init_disp), dim=1) 84 | # residual refinement 85 | refine_disp = self.refine_blocks[i](init_disp, guide_fms) 86 | # up-sample the refined disparity map 87 | refine_disp = F.interpolate(refine_disp*2, scale_factor=(2,2), mode='bilinear', align_corners=False) 88 | 89 | disps.append(refine_disp) 90 | 91 | # In this framework, we always keep the better disparity map be ahead the worse. 92 | disps.reverse() 93 | 94 | return disps 95 | 96 | 97 | 98 | 99 | -------------------------------------------------------------------------------- /dmb/modeling/stereo/disp_refinement/StereoNet.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | from .utils.edge_aware import EdgeAwareRefinement 6 | 7 | class StereoNetRefinement(nn.Module): 8 | """ 9 | The disparity refinement module proposed in StereoNet. 10 | 11 | Args: 12 | in_planes (int): the channels of input 13 | batch_norm (bool): whether use batch normalization layer, default True 14 | num (int): the number of edge aware refinement module 15 | 16 | Inputs: 17 | disps (list of Tensor): estimated disparity map, in [BatchSize, 1, Height//s, Width//s] layout 18 | left (Tensor): left image feature, in [BatchSize, Channels, Height, Width] layout 19 | right(Tensor): right image feature, in [BatchSize, Channels, Height, Width] layout 20 | leftImage (Tensor): left image, in [BatchSize, 3, Height, Width] layout 21 | rightImage (Tensor): right image, in [BatchSize, 3, Height, Width] layout 22 | 23 | Outputs: 24 | refine_disps (list of Tensor): refined disparity map, in [BatchSize, 1, Height, Width] layout 25 | 26 | """ 27 | 28 | def __init__(self, in_planes, batch_norm=True, num=1): 29 | super(StereoNetRefinement, self).__init__() 30 | self.in_planes = in_planes 31 | self.batch_norm = batch_norm 32 | self.num = num 33 | 34 | # cascade the edge aware refinement module 35 | self.refine_blocks = nn.ModuleList([ 36 | EdgeAwareRefinement(self.in_planes, self.batch_norm) for _ in range(self.num) 37 | ]) 38 | 39 | def forward(self, disps, left, right, leftImage, rightImage): 40 | # only one estimated disparity map in StereoNet 41 | init_disp = disps[-1] 42 | 43 | # Upsample the coarse disparity map to the full resolution 44 | h, w = leftImage.shape[-2:] 45 | 46 | # the scale of downsample 47 | scale = w / init_disp.shape[-1] 48 | 49 | # upsample disparity map to image size, in [BatchSize, 1, Height, Width] 50 | init_disp = F.interpolate(init_disp, size=(h, w), mode='bilinear', align_corners=False) 51 | init_disp = init_disp * scale 52 | 53 | # cascade and refine the previous disparity map 54 | refine_disps = [init_disp] 55 | for block in self.refine_blocks: 56 | refine_disps.append(block(refine_disps[-1], leftImage)) 57 | 58 | # In this framework, we always keep the better disparity map be ahead the worse. 59 | refine_disps.reverse() 60 | 61 | return refine_disps 62 | 63 | 64 | 65 | 66 | -------------------------------------------------------------------------------- /dmb/modeling/stereo/disp_refinement/__init__.py: -------------------------------------------------------------------------------- 1 | from .builder import build_disp_refinement -------------------------------------------------------------------------------- /dmb/modeling/stereo/disp_refinement/builder.py: -------------------------------------------------------------------------------- 1 | from .StereoNet import StereoNetRefinement 2 | from .DeepPruner import DeepPrunerRefinement 3 | from .AnyNet import AnyNetRefinement 4 | 5 | REFINEMENTS = { 6 | "StereoNet": StereoNetRefinement, 7 | "DeepPruner": DeepPrunerRefinement, 8 | "AnyNet": AnyNetRefinement, 9 | } 10 | 11 | 12 | def build_disp_refinement(cfg): 13 | refine_type = cfg.model.disp_refinement.type 14 | assert refine_type in REFINEMENTS, "disp refinement type not found, excepted: {}," \ 15 | "but got {}".format(REFINEMENTS.keys(), refine_type) 16 | 17 | default_args = cfg.model.disp_refinement.copy() 18 | default_args.pop('type') 19 | default_args.update(batch_norm=cfg.model.batch_norm) 20 | 21 | refinement = REFINEMENTS[refine_type](**default_args) 22 | 23 | return refinement 24 | -------------------------------------------------------------------------------- /dmb/modeling/stereo/disp_refinement/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DeepMotionAIResearch/DenseMatchingBenchmark/010aeb66e3ceaf3d866036b0ca751861df39432d/dmb/modeling/stereo/disp_refinement/utils/__init__.py -------------------------------------------------------------------------------- /dmb/modeling/stereo/disp_refinement/utils/edge_aware.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | from dmb.modeling.stereo.layers.basic_layers import conv_bn, conv_bn_relu, BasicBlock 6 | 7 | 8 | class EdgeAwareRefinement(nn.Module): 9 | """ 10 | The edge aware refinement module proposed in StereoNet. 11 | Args: 12 | in_planes (int): the channels of input 13 | batch_norm (bool): whether use batch normalization layer, default True 14 | 15 | Inputs: 16 | disp (Tensor): estimated disparity map, in [BatchSize, 1, Height//s, Width//s] layout 17 | leftImage (Tensor): left image, in [BatchSize, Channels, Height, Width] layout 18 | 19 | Outputs: 20 | refine_disp (Tensor): refined disparity map, in [BatchSize, 1, Height, Width] layout 21 | """ 22 | 23 | def __init__(self, in_planes, batch_norm=True): 24 | super(EdgeAwareRefinement, self).__init__() 25 | 26 | self.in_planes = in_planes 27 | self.batch_norm = batch_norm 28 | 29 | self.conv_mix = conv_bn_relu(self.batch_norm, self.in_planes, 32, 30 | kernel_size=3, stride=1, padding=1, dilation=1, bias=True) 31 | 32 | # Dilated residual module 33 | self.residual_dilation_blocks = nn.ModuleList() 34 | self.dilation_list = [1, 2, 4, 8, 1, 1] 35 | for dilation in self.dilation_list: 36 | self.residual_dilation_blocks.append( 37 | BasicBlock(self.batch_norm, 32, 32, stride=1, downsample=None, 38 | padding=1, dilation=dilation) 39 | ) 40 | 41 | self.conv_res = nn.Conv2d(32, 1, kernel_size=3, stride=1, padding=1, bias=True) 42 | 43 | def forward(self, disp, leftImage): 44 | h, w = leftImage.shape[-2:] 45 | 46 | # the scale of downsample 47 | scale = w / disp.shape[-1] 48 | 49 | # upsample disparity map to image size, in [BatchSize, 1, Height, Width] 50 | up_disp = F.interpolate(disp, size=(h, w), mode='bilinear', align_corners=False) 51 | up_disp = up_disp * scale 52 | 53 | # residual refinement 54 | # mix the info inside the disparity map and left image 55 | mix_feat = self.conv_mix(torch.cat((up_disp, leftImage), dim=1)) 56 | 57 | for block in self.residual_dilation_blocks: 58 | mix_feat = block(mix_feat) 59 | 60 | # get residual disparity map, in [BatchSize, 1, Height, Width] 61 | res_disp = self.conv_res(mix_feat) 62 | 63 | # refine the upsampled disparity map, in [BatchSize, 1, Height, Width] 64 | refine_disp = res_disp + up_disp 65 | 66 | # promise all disparity value larger than 0, in [BatchSize, 1, Height, Width] 67 | refine_disp = F.relu(refine_disp, inplace=True) 68 | 69 | return refine_disp 70 | 71 | 72 | -------------------------------------------------------------------------------- /dmb/modeling/stereo/disp_refinement/utils/min_warp_error.py: -------------------------------------------------------------------------------- 1 | """ 2 | Written by youmi. 3 | Implementation of stack dilation module. 4 | 5 | FrameWork: PyTorch 6 | """ 7 | 8 | from __future__ import print_function 9 | import torch 10 | import torch.nn as nn 11 | import torch.utils.data 12 | import torch.nn.functional as F 13 | 14 | from dmb.modeling.stereo.layers.inverse_warp import inverse_warp 15 | from dmb.modeling.stereo.layers.basic_layers import conv_bn_relu, BasicBlock, conv_bn, deconv_bn_relu 16 | 17 | 18 | class WarpErrorRefinement(nn.Module): 19 | """ 20 | Minimise the warp error to refine initial disparity map. 21 | Args: 22 | in_planes, (int): the channels of left feature 23 | batch_norm, (bool): whether use batch normalization layer 24 | 25 | Inputs: 26 | disp, (Tensor): the left disparity map, in (BatchSize, 1, Height//s, Width//s) layout 27 | left, (Tensor): the left image feature, in (BatchSize, Channels, Height, Width) layout 28 | right, (Tensor): the right image feature, in (BatchSize, Channels, Height, Width) layout 29 | 30 | Outputs: 31 | refine_disp (Tensor): refined disparity map, in [BatchSize, 1, Height, Width] layout 32 | 33 | """ 34 | 35 | def __init__(self, in_planes, C=16, batch_norm=True): 36 | super(WarpErrorRefinement, self).__init__() 37 | self.in_planes = in_planes 38 | self.batch_norm = batch_norm 39 | self.C = C 40 | 41 | self.conv_mix = conv_bn_relu(batch_norm, in_planes*4 + 1, 2*C, kernel_size=3, stride=1, padding=1, dilation=1, bias=False) 42 | 43 | # Dilated residual module 44 | self.residual_dilation_blocks = nn.ModuleList() 45 | self.dilation_list = [1, 2, 4, 8, 1, 1] 46 | for dilation in self.dilation_list: 47 | self.residual_dilation_blocks.append( 48 | conv_bn_relu(batch_norm, 2*C, 2*C, kernel_size=3, stride=1, 49 | padding=dilation, dilation=dilation, bias=False) 50 | ) 51 | 52 | self.conv_res = nn.Conv2d(2*C, 1, kernel_size=3, stride=1, padding=1, bias=True) 53 | 54 | def forward(self, disp, left, right): 55 | B, C, H, W = left.shape 56 | 57 | # the scale of downsample 58 | scale = W / disp.shape[-1] 59 | 60 | # upsample disparity map to image size, in [BatchSize, 1, Height, Width] 61 | up_disp = F.interpolate(disp, size=(H, W), mode='bilinear', align_corners=True) 62 | up_disp = up_disp * scale 63 | 64 | # calculate warp error 65 | warp_right = inverse_warp(right, -up_disp) 66 | error = torch.abs(left - warp_right) 67 | 68 | # residual refinement 69 | # mix the info inside the disparity map, left image, right image and warp error 70 | mix_feat = self.conv_mix(torch.cat((left, right, warp_right, error, disp), 1)) 71 | 72 | for block in self.residual_dilation_blocks: 73 | mix_feat = block(mix_feat) 74 | 75 | # get residual disparity map, in [BatchSize, 1, Height, Width] 76 | res_disp = self.conv_res(mix_feat) 77 | 78 | # refine the upsampled disparity map, in [BatchSize, 1, Height, Width] 79 | refine_disp = res_disp + up_disp 80 | 81 | # promise all disparity value larger than 0, in [BatchSize, 1, Height, Width] 82 | refine_disp = F.relu(refine_disp, inplace=True) 83 | 84 | return refine_disp 85 | -------------------------------------------------------------------------------- /dmb/modeling/stereo/disp_samplers/__init__.py: -------------------------------------------------------------------------------- 1 | from .builder import build_disp_sampler -------------------------------------------------------------------------------- /dmb/modeling/stereo/disp_samplers/builder.py: -------------------------------------------------------------------------------- 1 | from .DeepPruner import DeepPrunerSampler 2 | 3 | SAMPLER = { 4 | "DeepPruner": DeepPrunerSampler, 5 | } 6 | 7 | 8 | def build_disp_sampler(cfg): 9 | sampler_type = cfg.model.disp_sampler.type 10 | assert sampler_type in SAMPLER, "disp_sampler type not found, expected: {}," \ 11 | "but got {}".format(SAMPLER.keys(), sampler_type) 12 | 13 | default_args = cfg.model.disp_sampler.copy() 14 | default_args.pop('type') 15 | default_args.update(batch_norm=cfg.model.batch_norm) 16 | 17 | sampler = SAMPLER[sampler_type](**default_args) 18 | 19 | return sampler 20 | -------------------------------------------------------------------------------- /dmb/modeling/stereo/disp_samplers/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DeepMotionAIResearch/DenseMatchingBenchmark/010aeb66e3ceaf3d866036b0ca751861df39432d/dmb/modeling/stereo/disp_samplers/utils/__init__.py -------------------------------------------------------------------------------- /dmb/modeling/stereo/layers/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DeepMotionAIResearch/DenseMatchingBenchmark/010aeb66e3ceaf3d866036b0ca751861df39432d/dmb/modeling/stereo/layers/__init__.py -------------------------------------------------------------------------------- /dmb/modeling/stereo/layers/dilated_hourglass.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | from dmb.modeling.stereo.layers.basic_layers import conv3d_bn, conv3d_bn_relu, conv_bn_relu, deconv3d_bn 6 | 7 | 8 | class Hourglass(nn.Module): 9 | def __init__(self, in_planes, batchNorm=True): 10 | super(Hourglass, self).__init__() 11 | self.batchNorm = batchNorm 12 | 13 | self.conv1 = conv3d_bn_relu( 14 | self.batchNorm, in_planes, in_planes * 2, 15 | kernel_size=3, stride=2, padding=1, bias=False 16 | ) 17 | 18 | self.conv2 = conv3d_bn( 19 | self.batchNorm, in_planes * 2, in_planes * 2, 20 | kernel_size=3, stride=1, padding=1, bias=False 21 | ) 22 | 23 | self.conv3 = conv3d_bn_relu( 24 | self.batchNorm, in_planes * 2, in_planes * 2, 25 | kernel_size=3, stride=2, padding=1, bias=False 26 | ) 27 | self.conv4 = conv3d_bn_relu( 28 | self.batchNorm, in_planes * 2, in_planes * 2, 29 | kernel_size=3, stride=1, padding=1, bias=False 30 | ) 31 | self.conv5 = deconv3d_bn( 32 | self.batchNorm, in_planes * 2, in_planes * 2, 33 | kernel_size=3, padding=1, output_padding=1, stride=2, bias=False 34 | ) 35 | self.conv6 = deconv3d_bn( 36 | self.batchNorm, in_planes * 2, in_planes, 37 | kernel_size=3, padding=1, output_padding=1, stride=2, bias=False 38 | ) 39 | 40 | def forward(self, x, presqu, postsqu): 41 | # in:1/4, out:1/8 42 | out = self.conv1(x) 43 | # in:1/8, out:1/8 44 | pre = self.conv2(out) 45 | if postsqu is not None: 46 | pre = F.relu(pre + postsqu, inplace=True) 47 | else: 48 | pre = F.relu(pre, inplace=True) 49 | 50 | # in:1/8, out:1/16 51 | out = self.conv3(pre) 52 | # in:1/16, out:1/16 53 | out = self.conv4(out) 54 | 55 | # in:1/16, out:1/8 56 | if presqu is not None: 57 | post = F.relu(self.conv5(out) + presqu, inplace=True) 58 | else: 59 | post = F.relu(self.conv5(out) + pre, inplace=True) 60 | 61 | # in:1/8, out:1/4 62 | out = self.conv6(post) 63 | 64 | return out, pre, post 65 | -------------------------------------------------------------------------------- /dmb/modeling/stereo/layers/inverse_warp.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | 4 | def inverse_warp(img, disp, padding_mode='zeros'): 5 | """ 6 | Args: 7 | img (Tensor): the source image (where to sample pixels) -- [B, C, H, W] 8 | disp (Tensor): disparity map of the target image -- [B, 1, H, W] 9 | padding_mode (str): padding mode, default is zero padding 10 | Returns: 11 | projected_img (Tensor): source image warped to the target image -- [B, C, H, W] 12 | """ 13 | b, _, h, w = disp.size() 14 | 15 | # [1, H, W] copy 0-height for w times : y coord 16 | i_range = torch.arange(0, h).view(1, h, 1).expand(1, h, w).float() 17 | # [1, H, W] copy 0-width for h times : x coord 18 | j_range = torch.arange(0, w).view(1, 1, w).expand(1, h, w).float() 19 | 20 | pixel_coords = torch.stack((j_range, i_range), dim=1).float().to(disp.device) # [1, 2, H, W] 21 | batch_pixel_coords = pixel_coords.expand(b, 2, h, w).contiguous().view(b, 2, -1) # [B, 2, H*W] 22 | 23 | X = batch_pixel_coords[:, 0, :] + disp.contiguous().view(b, -1) # [B, H*W] 24 | Y = batch_pixel_coords[:, 1, :] 25 | 26 | X_norm = 2 * X / (w - 1) - 1 27 | Y_norm = 2 * Y / (h - 1) - 1 28 | 29 | # If grid has values outside the range of [-1, 1], the corresponding outputs are handled as defined by padding_mode. 30 | # Details please refer to torch.nn.functional.grid_sample 31 | if padding_mode == 'zeros': 32 | X_mask = ((X_norm > 1) + (X_norm < -1)).detach() 33 | X_norm[X_mask] = 2 34 | Y_mask = ((Y_norm > 1) + (Y_norm < -1)).detach() 35 | Y_norm[Y_mask] = 2 36 | 37 | pixel_coords = torch.stack([X_norm, Y_norm], dim=2) # [B, H*W, 2] 38 | pixel_coords = pixel_coords.view(b, h, w, 2) # [B, H, W, 2] 39 | 40 | projected_img = torch.nn.functional.grid_sample(img, pixel_coords, padding_mode=padding_mode) 41 | 42 | return projected_img 43 | -------------------------------------------------------------------------------- /dmb/modeling/stereo/layers/inverse_warp_3d.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn.functional as F 3 | 4 | def inverse_warp_3d(img, disp, padding_mode='zeros', disp_Y=None): 5 | """ 6 | Args: 7 | img (Tensor): the source image (where to sample pixels) -- [B, C, H, W] or [B, C, D, H, W] 8 | disp (Tensor): disparity map of the target image -- [B, D, H, W] 9 | padding_mode (str): padding mode, default is zero padding 10 | disp_Y (Tensor): disparity map of the target image along Y-axis -- [B, D, H, W] 11 | Returns: 12 | projected_img (Tensor): source image warped to the target image -- [B, C, D, H, W] 13 | """ 14 | 15 | device = disp.device 16 | B, D, H, W = disp.shape 17 | C = img.shape[1] 18 | 19 | if disp_Y is not None: 20 | assert disp.shape == disp_Y.shape, 'disparity map along x and y axis should have same shape!' 21 | if img.dim() == 4: 22 | img = img.unsqueeze(2).expand(B, C, D, H, W) 23 | elif img.dim() == 5: 24 | assert D == img.shape[2], 'The disparity number should be same between image and disparity map!' 25 | else: 26 | raise ValueError('image is only allowed with 4 or 5 dimensions, ' 27 | 'but got {} dimensions!'.format(img.dim())) 28 | 29 | # get mesh grid for each dimension 30 | grid_d = torch.linspace(0, D - 1, D).view(1, D, 1, 1).expand(B, D, H, W).to(device) 31 | grid_h = torch.linspace(0, H - 1, H).view(1, 1, H, 1).expand(B, D, H, W).to(device) 32 | grid_w = torch.linspace(0, W - 1, W).view(1, 1, 1, W).expand(B, D, H, W).to(device) 33 | 34 | # shift the index of W dimension with disparity 35 | grid_w = grid_w + disp 36 | if disp_Y is not None: 37 | grid_h = grid_h + disp_Y 38 | 39 | # normalize the grid value into [-1, 1]; (0, D-1), (0, H-1), (0, W-1) 40 | grid_d = (grid_d / (D - 1) * 2) - 1 41 | grid_h = (grid_h / (H - 1) * 2) - 1 42 | grid_w = (grid_w / (W - 1) * 2) - 1 43 | 44 | # concatenate the grid_* to [B, D, H, W, 3] 45 | grid_d = grid_d.unsqueeze(4) 46 | grid_h = grid_h.unsqueeze(4) 47 | grid_w = grid_w.unsqueeze(4) 48 | grid = torch.cat((grid_w, grid_h, grid_d), 4) 49 | 50 | projected_img = F.grid_sample(img, grid, padding_mode=padding_mode) 51 | 52 | return projected_img 53 | -------------------------------------------------------------------------------- /dmb/modeling/stereo/losses/__init__.py: -------------------------------------------------------------------------------- 1 | from .builder import make_gsm_loss_evaluator 2 | -------------------------------------------------------------------------------- /dmb/modeling/stereo/losses/conf_nll_loss.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | 6 | class ConfidenceNllLoss(object): 7 | """ 8 | Args: 9 | weights (list of float or None): weight for each scale of estCost. 10 | start_disp (int): the start searching disparity index, usually be 0 11 | max_disp (int): the max of Disparity. default: 192 12 | sparse (bool): whether the ground-truth disparity is sparse, 13 | for example, KITTI is sparse, but SceneFlow is not. default is False 14 | Inputs: 15 | estConf (Tensor or list of Tensor): the estimated confidence map, 16 | in [BatchSize, 1, Height, Width] layout. 17 | gtDisp (Tensor): the ground truth disparity map, 18 | in [BatchSize, 1, Height, Width] layout. 19 | Outputs: 20 | weighted_loss_all_level (dict of Tensors): the weighted loss of all levels 21 | """ 22 | 23 | def __init__(self, max_disp, start_disp=0, weights=None, sparse=False): 24 | self.max_disp = max_disp 25 | self.start_disp = start_disp 26 | self.weights = weights 27 | self.sparse = sparse 28 | if sparse: 29 | # sparse disparity ==> max_pooling 30 | self.scale_func = F.adaptive_max_pool2d 31 | else: 32 | # dense disparity ==> avg_pooling 33 | self.scale_func = F.adaptive_avg_pool2d 34 | 35 | def loss_per_level(self, estConf, gtDisp): 36 | N, C, H, W = estConf.shape 37 | scaled_gtDisp = gtDisp 38 | scale = 1.0 39 | if gtDisp.shape[-2] != H or gtDisp.shape[-1] != W: 40 | # compute scale per level and scale gtDisp 41 | scale = gtDisp.shape[-1] / (W * 1.0) 42 | scaled_gtDisp = gtDisp / scale 43 | scaled_gtDisp = self.scale_func(scaled_gtDisp, (H, W)) 44 | 45 | # mask for valid disparity 46 | # gt zero and lt max disparity 47 | mask = (scaled_gtDisp > self.start_disp) & (scaled_gtDisp < (self.max_disp / scale)) 48 | mask = mask.detach_().type_as(gtDisp) 49 | 50 | # NLL loss 51 | valid_pixel_number = mask.float().sum() 52 | if valid_pixel_number < 1.0: 53 | valid_pixel_number = 1.0 54 | loss = (-1.0 * F.logsigmoid(estConf) * mask).sum() / valid_pixel_number 55 | 56 | return loss 57 | 58 | def __call__(self, estConf, gtDisp): 59 | if not isinstance(estConf, (list, tuple)): 60 | estConf = [estConf] 61 | 62 | if self.weights is None: 63 | self.weights = [1.0] * len(estConf) 64 | 65 | # compute loss for per level 66 | loss_all_level = [ 67 | self.loss_per_level(est_conf_per_lvl, gtDisp) 68 | for est_conf_per_lvl in estConf 69 | ] 70 | 71 | # re-weight loss per level 72 | weighted_loss_all_level = dict() 73 | for i, loss_per_level in enumerate(loss_all_level): 74 | name = "conf_loss_lvl{}".format(i) 75 | weighted_loss_all_level[name] = self.weights[i] * loss_per_level 76 | 77 | return weighted_loss_all_level 78 | 79 | def __repr__(self): 80 | repr_str = '{}\n'.format(self.__class__.__name__) 81 | repr_str += ' ' * 4 + 'Max Disparity: {}\n'.format(self.max_disp) 82 | repr_str += ' ' * 4 + 'Loss weight: {}\n'.format(self.weights) 83 | repr_str += ' ' * 4 + 'Disparity is sparse: {}\n'.format(self.sparse) 84 | 85 | return repr_str 86 | 87 | @property 88 | def name(self): 89 | return 'ConfidenceNLLLoss' 90 | -------------------------------------------------------------------------------- /dmb/modeling/stereo/losses/smooth_l1_loss.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | 6 | class DispSmoothL1Loss(object): 7 | """ 8 | Args: 9 | max_disp (int): the max of Disparity. default is 192 10 | start_disp (int): the start searching disparity index, usually be 0 11 | weights (list of float or None): weight for each scale of estCost. 12 | sparse (bool): whether the ground-truth disparity is sparse, 13 | for example, KITTI is sparse, but SceneFlow is not, default is False. 14 | Inputs: 15 | estDisp (Tensor or list of Tensor): the estimated disparity map, 16 | in [BatchSize, 1, Height, Width] layout. 17 | gtDisp (Tensor): the ground truth disparity map, 18 | in [BatchSize, 1, Height, Width] layout. 19 | Outputs: 20 | loss (dict), the loss of each level 21 | """ 22 | 23 | def __init__(self, max_disp, start_disp=0, weights=None, sparse=False): 24 | self.max_disp = max_disp 25 | self.weights = weights 26 | self.start_disp = start_disp 27 | self.sparse = sparse 28 | if sparse: 29 | # sparse disparity ==> max_pooling 30 | self.scale_func = F.adaptive_max_pool2d 31 | else: 32 | # dense disparity ==> avg_pooling 33 | self.scale_func = F.adaptive_avg_pool2d 34 | 35 | def loss_per_level(self, estDisp, gtDisp): 36 | N, C, H, W = estDisp.shape 37 | scaled_gtDisp = gtDisp 38 | scale = 1.0 39 | if gtDisp.shape[-2] != H or gtDisp.shape[-1] != W: 40 | # compute scale per level and scale gtDisp 41 | scale = gtDisp.shape[-1] / (W * 1.0) 42 | scaled_gtDisp = gtDisp / scale 43 | scaled_gtDisp = self.scale_func(scaled_gtDisp, (H, W)) 44 | 45 | # mask for valid disparity 46 | # (start disparity, max disparity / scale) 47 | # Attention: the invalid disparity of KITTI is set as 0, be sure to mask it out 48 | mask = (scaled_gtDisp > self.start_disp) & (scaled_gtDisp < (self.max_disp / scale)) 49 | if mask.sum() < 1.0: 50 | print('SmoothL1 loss: there is no point\'s disparity is in ({},{})!'.format(self.start_disp, 51 | self.max_disp / scale)) 52 | loss = (torch.abs(estDisp - scaled_gtDisp) * mask.float()).mean() 53 | return loss 54 | 55 | # smooth l1 loss 56 | loss = F.smooth_l1_loss(estDisp[mask], scaled_gtDisp[mask], reduction='mean') 57 | 58 | return loss 59 | 60 | def __call__(self, estDisp, gtDisp): 61 | if not isinstance(estDisp, (list, tuple)): 62 | estDisp = [estDisp] 63 | 64 | if self.weights is None: 65 | self.weights = [1.0] * len(estDisp) 66 | 67 | # compute loss for per level 68 | loss_all_level = [] 69 | for est_disp_per_lvl in estDisp: 70 | loss_all_level.append( 71 | self.loss_per_level(est_disp_per_lvl, gtDisp) 72 | ) 73 | 74 | # re-weight loss per level 75 | weighted_loss_all_level = dict() 76 | for i, loss_per_level in enumerate(loss_all_level): 77 | name = "l1_loss_lvl{}".format(i) 78 | weighted_loss_all_level[name] = self.weights[i] * loss_per_level 79 | 80 | return weighted_loss_all_level 81 | 82 | def __repr__(self): 83 | repr_str = '{}\n'.format(self.__class__.__name__) 84 | repr_str += ' ' * 4 + 'Max Disparity: {}\n'.format(self.max_disp) 85 | repr_str += ' ' * 4 + 'Start disparity: {}\n'.format(self.start_disp) 86 | repr_str += ' ' * 4 + 'Loss weight: {}\n'.format(self.weights) 87 | repr_str += ' ' * 4 + 'Disparity is sparse: {}\n'.format(self.sparse) 88 | 89 | return repr_str 90 | 91 | @property 92 | def name(self): 93 | return 'SmoothL1Loss' 94 | -------------------------------------------------------------------------------- /dmb/modeling/stereo/losses/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .disp2prob import ( 2 | LaplaceDisp2Prob, GaussianDisp2Prob, OneHotDisp2Prob 3 | ) 4 | from .ssim import SSIM 5 | -------------------------------------------------------------------------------- /dmb/modeling/stereo/losses/utils/quantile_loss.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | def quantile_loss(minEstDisp, maxEstDisp, gtDisp, max_disp, start_disp=0, weight=1.0, theta=0.05): 6 | """ 7 | An implementation of quantile loss proposed in DeepPruner 8 | Details refer to: https://github.com/uber-research/DeepPruner/blob/master/deeppruner/loss_evaluation.py 9 | 10 | Inputs: 11 | minEstDisp, (Tensor): the estimated min disparity map, i.e. the lower bound of disparity samples, 12 | in [BatchSize, 1, Height, Width] layout. 13 | maxEstDisp, (Tensor): the estimated max disparity map, i.e. the upper bound of disparity samples 14 | in [BatchSize, 1, Height, Width] layout. 15 | gtDisp, (Tensor): the ground truth disparity map, 16 | in [BatchSize, 1, Height, Width] layout. 17 | max_disp (int): the max of Disparity. default is 192 18 | start_disp (int): the start searching disparity index, usually be 0 19 | weight (int, float): the weight of quantile loss 20 | theta (float): the balancing scalar, 0 < theta < 0.05 21 | 22 | 23 | """ 24 | # get valid ground truth disparity 25 | mask = (gtDisp > start_disp) & (gtDisp < (start_disp + max_disp)) 26 | 27 | # forces min_disparity to be equal or slightly lower than the ground truth disparity 28 | min_mask = ((gtDisp[mask] -minEstDisp[mask]) < 0).float() 29 | # if x < 0, x * (-0.95); if x > 0, x * 0.05 30 | min_loss = (gtDisp[mask] - minEstDisp[mask]) * (theta - min_mask) 31 | min_loss = min_loss.mean() 32 | 33 | # forces max_disparity to be equal or slightly larger than the ground truth disparity 34 | max_mask = ((gtDisp[mask] - maxEstDisp[mask]) < 0).float() 35 | # if x < 0, x * (-0.05); if x > 0, x * 0.95 36 | max_loss = (gtDisp[mask] - maxEstDisp[mask]) * ((1 - theta) - max_mask) 37 | max_loss = max_loss.mean() 38 | 39 | total_loss = (min_loss + max_loss) * weight 40 | 41 | return total_loss -------------------------------------------------------------------------------- /dmb/modeling/stereo/losses/utils/ssim.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn.functional as F 3 | 4 | 5 | def SSIM(x, y, mask=None, C1=0.01 ** 2, C2=0.03 ** 2): 6 | """ 7 | Calculate the SSIM between two given tensor. 8 | Details please refer to https://en.wikipedia.org/wiki/Structural_similarity 9 | Args: 10 | x (torch.Tensor): in [BatchSize, Channels, Height, Width] layout 11 | y (torch.Tensor): in [BatchSize, Channels, Height, Width] layout 12 | mask (None or torch.Tensor): the mask of valid index, in [BatchSize, Channels, Height, Width] layout 13 | C1 (double or int): a variable to stabilize the division with weak denominator 14 | C2 (double or int): a variable to stabilize the division with weak denominator 15 | Outputs: 16 | (double): the average difference between x and y, value ranges from [0, 1] 17 | """ 18 | 19 | mu_x = F.avg_pool2d(x, 3, 1, 1) 20 | mu_y = F.avg_pool2d(y, 3, 1, 1) 21 | mu_x_mu_y = mu_x * mu_y 22 | mu_x_sq = mu_x.pow(2) 23 | mu_y_sq = mu_y.pow(2) 24 | 25 | sigma_x = F.avg_pool2d(x ** 2, 3, 1, 1) - mu_x_sq 26 | sigma_y = F.avg_pool2d(y ** 2, 3, 1, 1) - mu_y_sq 27 | sigma_xy = F.avg_pool2d(x * y, 3, 1, 1) - mu_x_mu_y 28 | 29 | SSIM_n = (2 * mu_x_mu_y + C1) * (2 * sigma_xy + C2) 30 | SSIM_d = (mu_x_sq + mu_y_sq + C1) * (sigma_x + sigma_y + C2) 31 | SSIM = SSIM_n / SSIM_d 32 | 33 | if mask is not None: 34 | SSIM = SSIM[mask] 35 | 36 | # Here, we calculate the difference between x and y, and limit its value in [0,1] 37 | return torch.clamp((1 - SSIM) / 2, 0, 1).mean() 38 | -------------------------------------------------------------------------------- /dmb/modeling/stereo/models/__init__.py: -------------------------------------------------------------------------------- 1 | from .general_stereo_model import GeneralizedStereoModel 2 | from .DeepPruner import DeepPruner 3 | from .AnyNet import AnyNet 4 | 5 | _META_ARCHITECTURES = { 6 | "GeneralizedStereoModel": GeneralizedStereoModel, 7 | "DeepPruner": DeepPruner, 8 | "AnyNet": AnyNet, 9 | } 10 | 11 | 12 | def build_stereo_model(cfg): 13 | meta_arch = _META_ARCHITECTURES[cfg.model.meta_architecture] 14 | return meta_arch(cfg) 15 | -------------------------------------------------------------------------------- /dmb/modeling/stereo/models/general_stereo_model.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | from dmb.modeling.stereo.backbones import build_backbone 6 | from dmb.modeling.stereo.disp_samplers import build_disp_sampler 7 | from dmb.modeling.stereo.cost_processors import build_cost_processor 8 | from dmb.modeling.stereo.cmn import build_cmn 9 | from dmb.modeling.stereo.disp_predictors import build_disp_predictor 10 | from dmb.modeling.stereo.disp_refinement import build_disp_refinement 11 | from dmb.modeling.stereo.losses import make_gsm_loss_evaluator 12 | 13 | 14 | class GeneralizedStereoModel(nn.Module): 15 | """ 16 | A general stereo matching model which fits most methods. 17 | 18 | """ 19 | def __init__(self, cfg): 20 | super(GeneralizedStereoModel, self).__init__() 21 | self.cfg = cfg.copy() 22 | self.max_disp = cfg.model.max_disp 23 | 24 | self.backbone = build_backbone(cfg) 25 | 26 | self.cost_processor = build_cost_processor(cfg) 27 | 28 | # confidence measurement network 29 | self.cmn = None 30 | if 'cmn' in cfg.model: 31 | self.cmn = build_cmn(cfg) 32 | 33 | self.disp_predictor = build_disp_predictor(cfg) 34 | 35 | self.disp_refinement = None 36 | if 'disp_refinement' in cfg.model: 37 | self.disp_refinement = build_disp_refinement(cfg) 38 | 39 | # make general stereo matching loss evaluator 40 | self.loss_evaluator = make_gsm_loss_evaluator(cfg) 41 | 42 | def forward(self, batch): 43 | # parse batch 44 | ref_img, tgt_img = batch['leftImage'], batch['rightImage'] 45 | target = batch['leftDisp'] if 'leftDisp' in batch else None 46 | 47 | # extract image feature 48 | ref_fms, tgt_fms = self.backbone(ref_img, tgt_img) 49 | 50 | # compute cost volume 51 | costs = self.cost_processor(ref_fms, tgt_fms) 52 | 53 | # disparity prediction 54 | disps = [self.disp_predictor(cost) for cost in costs] 55 | 56 | # disparity refinement 57 | if self.disp_refinement is not None: 58 | disps = self.disp_refinement(disps, ref_fms, tgt_fms, ref_img, tgt_img) 59 | 60 | if self.training: 61 | loss_dict = dict() 62 | variance = None 63 | if hasattr(self.cfg.model.losses, 'focal_loss'): 64 | variance = self.cfg.model.losses.focal_loss.get('variance', None) 65 | 66 | if self.cmn is not None: 67 | # confidence measurement network 68 | variance, cm_losses = self.cmn(costs, target) 69 | loss_dict.update(cm_losses) 70 | 71 | loss_args = dict( 72 | variance = variance, 73 | ) 74 | 75 | gsm_loss_dict = self.loss_evaluator(disps, costs, target, **loss_args) 76 | loss_dict.update(gsm_loss_dict) 77 | 78 | return {}, loss_dict 79 | 80 | else: 81 | 82 | results = dict( 83 | disps=disps, 84 | costs=costs, 85 | ) 86 | 87 | if self.cmn is not None: 88 | # confidence measurement network 89 | variance, confs = self.cmn(costs, target) 90 | results.update(confs=confs) 91 | 92 | return results, {} 93 | -------------------------------------------------------------------------------- /dmb/modeling/stereo/registry.py: -------------------------------------------------------------------------------- 1 | from dmb.utils import Registry 2 | 3 | BACKBONES = Registry() 4 | -------------------------------------------------------------------------------- /dmb/ops/__init__.py: -------------------------------------------------------------------------------- 1 | from .spn import GateRecurrent2dnoind 2 | from detectron2.layers import DeformConv, ModulatedDeformConv 3 | -------------------------------------------------------------------------------- /dmb/ops/spn/__init__.py: -------------------------------------------------------------------------------- 1 | from .modules import GateRecurrent2dnoind -------------------------------------------------------------------------------- /dmb/ops/spn/functions/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DeepMotionAIResearch/DenseMatchingBenchmark/010aeb66e3ceaf3d866036b0ca751861df39432d/dmb/ops/spn/functions/__init__.py -------------------------------------------------------------------------------- /dmb/ops/spn/functions/gaterecurrent2dnoind.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.autograd import Function 3 | try: 4 | from ..build.lib import gaterecurrent2dnoind_cuda as gaterecurrent2d 5 | except ImportError: 6 | import gaterecurrent2dnoind_cuda as gaterecurrent2d 7 | 8 | class GateRecurrent2dnoindFunction(Function): 9 | 10 | @staticmethod 11 | def forward(ctx, X, G1, G2, G3, horizontal, reverse): 12 | num, channels, height, width = X.size() 13 | output = torch.zeros(num, channels, height, width,device=X.device) 14 | 15 | if not X.is_cuda: 16 | print("cpu version is not ready at this time") 17 | return 0 18 | else: 19 | gaterecurrent2d.forward(horizontal,reverse, X, G1, G2, G3, output) 20 | ctx.save_for_backward(X, G1, G2, G3, output) 21 | ctx.hiddensize = X.size() 22 | ctx.horizontal = horizontal 23 | ctx.reverse = reverse 24 | return output 25 | 26 | @staticmethod 27 | def backward(ctx, grad_output): 28 | assert(ctx.hiddensize is not None and grad_output.is_cuda) 29 | num, channels, height, width = ctx.hiddensize 30 | X, G1, G2, G3, output = ctx.saved_tensors 31 | 32 | grad_X = torch.zeros(num, channels, height, width, device=X.device) 33 | grad_G1 = torch.zeros(num, channels, height, width, device=X.device) 34 | grad_G2 = torch.zeros(num, channels, height, width, device=X.device) 35 | grad_G3 = torch.zeros(num, channels, height, width, device=X.device) 36 | 37 | gaterecurrent2d.backward(ctx.horizontal, ctx.reverse, output, grad_output, X, G1, G2, G3, grad_X, grad_G1, grad_G2, grad_G3) 38 | 39 | return (grad_X, grad_G1, grad_G2, grad_G3)+(None,)*2 40 | -------------------------------------------------------------------------------- /dmb/ops/spn/gaterecurrent2dnoind_cuda.egg-info/PKG-INFO: -------------------------------------------------------------------------------- 1 | Metadata-Version: 1.0 2 | Name: gaterecurrent2dnoind-cuda 3 | Version: 0.0.0 4 | Summary: UNKNOWN 5 | Home-page: UNKNOWN 6 | Author: UNKNOWN 7 | Author-email: UNKNOWN 8 | License: UNKNOWN 9 | Description: UNKNOWN 10 | Platform: UNKNOWN 11 | -------------------------------------------------------------------------------- /dmb/ops/spn/gaterecurrent2dnoind_cuda.egg-info/SOURCES.txt: -------------------------------------------------------------------------------- 1 | setup.py 2 | gaterecurrent2dnoind_cuda.egg-info/PKG-INFO 3 | gaterecurrent2dnoind_cuda.egg-info/SOURCES.txt 4 | gaterecurrent2dnoind_cuda.egg-info/dependency_links.txt 5 | gaterecurrent2dnoind_cuda.egg-info/top_level.txt 6 | src/gaterecurrent2dnoind_cuda.cpp 7 | src/gaterecurrent2dnoind_kernel.cu -------------------------------------------------------------------------------- /dmb/ops/spn/gaterecurrent2dnoind_cuda.egg-info/dependency_links.txt: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /dmb/ops/spn/gaterecurrent2dnoind_cuda.egg-info/top_level.txt: -------------------------------------------------------------------------------- 1 | gaterecurrent2dnoind_cuda 2 | -------------------------------------------------------------------------------- /dmb/ops/spn/modules/__init__.py: -------------------------------------------------------------------------------- 1 | from .gaterecurrent2dnoind import GateRecurrent2dnoind 2 | -------------------------------------------------------------------------------- /dmb/ops/spn/modules/gaterecurrent2dnoind.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | from ..functions.gaterecurrent2dnoind import GateRecurrent2dnoindFunction 3 | 4 | class GateRecurrent2dnoind(nn.Module): 5 | """docstring for .""" 6 | def __init__(self, horizontal_, reverse_): 7 | super(GateRecurrent2dnoind, self).__init__() 8 | self.horizontal = horizontal_ 9 | self.reverse = reverse_ 10 | 11 | def forward(self, X, G1, G2, G3): 12 | return GateRecurrent2dnoindFunction.apply(X, G1, G2, G3,self.horizontal, self.reverse) 13 | -------------------------------------------------------------------------------- /dmb/ops/spn/setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | from torch.utils.cpp_extension import CppExtension, BuildExtension, CUDAExtension 3 | 4 | setup( 5 | name='gaterecurrent2dnoind_cuda', 6 | ext_modules=[ 7 | CUDAExtension('gaterecurrent2dnoind_cuda', [ 8 | 'src/gaterecurrent2dnoind_cuda.cpp', 9 | 'src/gaterecurrent2dnoind_kernel.cu', 10 | ]) 11 | ], 12 | cmdclass={ 13 | 'build_ext': BuildExtension 14 | }) 15 | 16 | 17 | -------------------------------------------------------------------------------- /dmb/ops/spn/src/gaterecurrent2dnoind_cuda.cpp: -------------------------------------------------------------------------------- 1 | // gaterecurrent2dnoind_cuda.c 2 | //#include 3 | #include 4 | #include 5 | #include "gaterecurrent2dnoind_kernel.h" 6 | 7 | int gaterecurrent2dnoind_forward_cuda(int horizontal_, int reverse_, torch::Tensor X, torch::Tensor G1, torch::Tensor G2, torch::Tensor G3, torch::Tensor output) 8 | { 9 | // Grab the input tensor to flat 10 | float * X_data = X.data(); 11 | float * G1_data = G1.data(); 12 | float * G2_data = G2.data(); 13 | float * G3_data = G3.data(); 14 | float * H_data = output.data(); 15 | 16 | // dimensions 17 | int num_ = X.size(0); 18 | int channels_ = X.size(1); 19 | int height_ = X.size(2); 20 | int width_ = X.size(3); 21 | 22 | 23 | if(horizontal_ && !reverse_) // left to right 24 | { 25 | //const int count = height_ * channels_ * num_; 26 | Forward_left_right(num_, channels_, height_, width_, X_data, G1_data, G2_data, G3_data, H_data, horizontal_, reverse_); 27 | } 28 | else if(horizontal_ && reverse_) // right to left 29 | { 30 | Forward_right_left(num_, channels_, height_, width_, X_data, G1_data, G2_data, G3_data, H_data, horizontal_, reverse_); 31 | } 32 | else if(!horizontal_ && !reverse_) // top to bottom 33 | { 34 | Forward_top_bottom(num_, channels_, height_, width_, X_data, G1_data, G2_data, G3_data, H_data, horizontal_, reverse_); 35 | } 36 | else 37 | { 38 | Forward_bottom_top(num_, channels_, height_, width_, X_data, G1_data, G2_data, G3_data, H_data, horizontal_, reverse_); 39 | } 40 | 41 | return 1; 42 | } 43 | 44 | int gaterecurrent2dnoind_backward_cuda(int horizontal_, int reverse_, torch::Tensor top, torch::Tensor top_grad, torch::Tensor X, torch::Tensor G1, torch::Tensor G2, torch::Tensor G3, torch::Tensor X_grad, torch::Tensor G1_grad, torch::Tensor G2_grad, torch::Tensor G3_grad) 45 | { 46 | //Grab the input tensor to flat 47 | float * X_data = X.data(); 48 | float * G1_data = G1.data(); 49 | float * G2_data = G2.data(); 50 | float * G3_data = G3.data(); 51 | float * H_data = top.data(); 52 | 53 | float * H_diff = top_grad.data(); 54 | 55 | float * X_diff = X_grad.data(); 56 | float * G1_diff = G1_grad.data(); 57 | float * G2_diff = G2_grad.data(); 58 | float * G3_diff = G3_grad.data(); 59 | 60 | // dimensions 61 | int num_ = X.size(0); 62 | int channels_ = X.size(1); 63 | int height_ = X.size(2); 64 | int width_ = X.size(3); 65 | 66 | 67 | if(horizontal_ && ! reverse_) //left to right 68 | { 69 | Backward_left_right(num_, channels_, height_, width_, X_data, G1_data, G2_data, G3_data, H_data, X_diff, G1_diff, G2_diff, G3_diff, H_diff, horizontal_, reverse_); 70 | } 71 | else if(horizontal_ && reverse_) //right to left 72 | { 73 | Backward_right_left(num_, channels_, height_, width_, X_data, G1_data, G2_data, G3_data, H_data, X_diff, G1_diff, G2_diff, G3_diff, H_diff, horizontal_, reverse_); 74 | } 75 | else if(!horizontal_ && !reverse_) //top to bottom 76 | { 77 | Backward_top_bottom(num_, channels_, height_, width_, X_data, G1_data, G2_data, G3_data, H_data, X_diff, G1_diff, G2_diff, G3_diff, H_diff, horizontal_, reverse_); 78 | } 79 | else { 80 | Backward_bottom_top(num_, channels_, height_, width_, X_data, G1_data, G2_data, G3_data, H_data, X_diff, G1_diff, G2_diff, G3_diff, H_diff, horizontal_, reverse_); 81 | } 82 | 83 | return 1; 84 | } 85 | 86 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m 87 | ) { 88 | m.def("forward", &gaterecurrent2dnoind_forward_cuda, "InnerProduct forward (CUDA)"); 89 | m.def("backward", &gaterecurrent2dnoind_backward_cuda, "InnerProduct backward (CUDA)"); 90 | } -------------------------------------------------------------------------------- /dmb/ops/spn/src/gaterecurrent2dnoind_cuda.h: -------------------------------------------------------------------------------- 1 | int gaterecurrent2dnoind_forward_cuda(int horizontal_, int reverse_, torch::Tensor X, torch::Tensor G1, torch::Tensor G2, torch::Tensor G3, torch::Tensor output); 2 | int gaterecurrent2dnoind_backward_cuda(int horizontal_, int reverse_, torch::Tensor top, torch::Tensor top_grad, torch::Tensor X, torch::Tensor G1, torch::Tensor G2, torch::Tensor G3, torch::Tensor X_diff, torch::Tensor G1_diff, torch::Tensor G2_diff, torch::Tensor G3_diff); 3 | -------------------------------------------------------------------------------- /dmb/ops/spn/src/gaterecurrent2dnoind_kernel.h: -------------------------------------------------------------------------------- 1 | 2 | 3 | void Forward_left_right(int num_, int channels_, int height_, int width_, float * X, float * G1, float * G2, float * G3, float * H, int horizontal_, int reverse_); 4 | void Forward_right_left(int num_, int channels_, int height_, int width_, float * X, float * G1, float * G2, float * G3, float * H, int horizontal_, int reverse_); 5 | void Forward_top_bottom(int num_, int channels_, int height_, int width_, float * X, float * G1, float * G2, float * G3, float * H, int horizontal_, int reverse_); 6 | void Forward_bottom_top(int num_, int channels_, int height_, int width_, float * X, float * G1, float * G2, float * G3, float * H, int horizontal_, int reverse_); 7 | void Backward_left_right(int num_, int channels_, int height_, int width_, float * X, float * G1, float * G2, float * G3, float * H, float * X_diff, float * G1_diff, float * G2_diff, float * G3_diff, float * H_diff, int horizontal_, int reverse_); 8 | void Backward_right_left(int num_, int channels_, int height_, int width_, float * X, float * G1, float * G2, float * G3, float * H, float * X_diff, float * G1_diff, float * G2_diff, float * G3_diff, float * H_diff, int horizontal_, int reverse_); 9 | void Backward_top_bottom(int num_, int channels_, int height_, int width_, float * X, float * G1, float * G2, float * G3, float * H, float * X_diff, float * G1_diff, float * G2_diff, float * G3_diff, float * H_diff, int horizontal_, int reverse_); 10 | void Backward_bottom_top(int num_, int channels_, int height_, int width_, float * X, float * G1, float * G2, float * G3, float * H, float * X_diff, float * G1_diff, float * G2_diff, float * G3_diff, float * H_diff, int horizontal_, int reverse_); 11 | 12 | 13 | -------------------------------------------------------------------------------- /dmb/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .dist_utils import all_reduce_grads, DistOptimizerHook, DistApexOptimizerHook 2 | from .collect_env import collect_env_info 3 | from .env import init_dist, set_random_seed, get_root_logger 4 | from .tensorboard_logger import TensorboardLoggerHook 5 | from .text_logger import TextLoggerHook 6 | from .registry import Registry 7 | -------------------------------------------------------------------------------- /dmb/utils/collect_env.py: -------------------------------------------------------------------------------- 1 | import PIL 2 | 3 | from torch.utils.collect_env import get_pretty_env_info 4 | 5 | 6 | def get_pil_version(): 7 | return "\n Pillow ({})".format(PIL.__version__) 8 | 9 | 10 | def collect_env_info(): 11 | env_str = get_pretty_env_info() 12 | env_str += get_pil_version() 13 | return env_str 14 | -------------------------------------------------------------------------------- /dmb/utils/dist_utils.py: -------------------------------------------------------------------------------- 1 | from collections import OrderedDict 2 | 3 | import torch.distributed as dist 4 | from torch._utils import ( 5 | _flatten_dense_tensors, _unflatten_dense_tensors, _take_tensors 6 | ) 7 | from mmcv.runner import OptimizerHook 8 | 9 | try: 10 | from apex import amp 11 | import apex 12 | except ImportError: 13 | raise ImportError('Use APEX for multi-precision via apex.amp') 14 | 15 | 16 | def _all_reduce_coalesced(tensors, world_size, bucket_size_mb=-1): 17 | if bucket_size_mb > 0: 18 | bucket_size_bytes = bucket_size_mb * 1024 * 1024 19 | buckets = _take_tensors(tensors, bucket_size_bytes) 20 | else: 21 | buckets = OrderedDict() 22 | for tensor in tensors: 23 | tp = tensor.type() 24 | if tp not in buckets: 25 | buckets[tp] = [] 26 | buckets[tp].append(tensor) 27 | buckets = buckets.values() 28 | 29 | for bucket in buckets: 30 | flat_tensors = _flatten_dense_tensors(bucket) 31 | dist.all_reduce(flat_tensors) 32 | flat_tensors.div_(world_size) 33 | for tensor, synced in zip( 34 | bucket, _unflatten_dense_tensors(flat_tensors, bucket)): 35 | tensor.copy_(synced) 36 | 37 | 38 | def all_reduce_grads(model, coalesce=True, bucket_size_mb=-1): 39 | grads = [ 40 | param.grad.data for param in model.parameters() 41 | if param.requires_grad and param.grad is not None 42 | ] 43 | 44 | world_size = dist.get_world_size() 45 | if coalesce: 46 | _all_reduce_coalesced(grads, world_size, bucket_size_mb) 47 | else: 48 | for tensor in grads: 49 | dist.all_reduce(tensor.div_(world_size)) 50 | 51 | 52 | class DistOptimizerHook(OptimizerHook): 53 | 54 | def __init__(self, grad_clip=None, coalesce=True, bucket_size_mb=-1): 55 | super(DistOptimizerHook, self).__init__(grad_clip) 56 | self.grad_clip = grad_clip 57 | self.coalesce = coalesce 58 | self.bucket_size_mb = bucket_size_mb 59 | 60 | def after_train_iter(self, runner): 61 | runner.optimizer.zero_grad() 62 | runner.outputs['loss'].backward() 63 | all_reduce_grads(runner.model, self.coalesce, self.bucket_size_mb) 64 | if self.grad_clip is not None: 65 | self.clip_grads(runner.model.parameters()) 66 | runner.optimizer.step() 67 | 68 | 69 | class DistApexOptimizerHook(OptimizerHook): 70 | 71 | def __init__(self, grad_clip=None, coalesce=True, bucket_size_mb=-1, use_apex=True): 72 | super(DistApexOptimizerHook, self).__init__(grad_clip) 73 | self.grad_clip = grad_clip 74 | self.coalesce = coalesce 75 | self.bucket_size_mb = bucket_size_mb 76 | self.use_apex = use_apex 77 | 78 | def after_train_iter(self, runner): 79 | runner.model.zero_grad() 80 | runner.optimizer.zero_grad() 81 | # Note: If mixed precision is not used, this ends up doing nothing 82 | # Otherwise apply loss scaling for mixed-precision recipe 83 | with amp.scale_loss(runner.outputs['loss'], runner.optimizer) as scaled_losses: 84 | scaled_losses.backward() 85 | all_reduce_grads(runner.model, self.coalesce, self.bucket_size_mb) 86 | if self.grad_clip is not None: 87 | self.clip_grads(runner.model.parameters()) 88 | runner.optimizer.step() 89 | -------------------------------------------------------------------------------- /dmb/utils/env.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import os 3 | import sys 4 | import random 5 | import subprocess 6 | 7 | import numpy as np 8 | 9 | import torch 10 | import torch.distributed as dist 11 | import torch.multiprocessing as mp 12 | 13 | from mmcv.runner import get_dist_info 14 | 15 | 16 | def _init_dist_pytorch(backend, **kwargs): 17 | # TODO: use local_rank instead of rank % num_gpus 18 | rank = int(os.environ['RANK']) 19 | num_gpus = torch.cuda.device_count() 20 | torch.cuda.set_device(rank % num_gpus) 21 | dist.init_process_group(backend=backend, **kwargs) 22 | 23 | 24 | def _init_dist_mpi(backend, **kwargs): 25 | raise NotImplementedError 26 | 27 | 28 | def _init_dist_slurm(backend, port=29500, **kwargs): 29 | proc_id = int(os.environ['SLURM_PROCID']) 30 | ntasks = int(os.environ['SLURM_NTASKS']) 31 | node_list = os.environ['SLURM_NODELIST'] 32 | num_gpus = torch.cuda.device_count() 33 | torch.cuda.set_device(proc_id % num_gpus) 34 | addr = subprocess.getoutput( 35 | 'scontrol show hostname {} | head -n1'.format(node_list)) 36 | os.environ['MASTER_PORT'] = str(port) 37 | os.environ['MASTER_ADDR'] = addr 38 | os.environ['WORLD_SIZE'] = str(ntasks) 39 | os.environ['RANK'] = str(proc_id) 40 | dist.init_process_group(backend=backend) 41 | 42 | 43 | def init_dist(launcher, backend='nccl', **kwargs): 44 | if mp.get_start_method(allow_none=True) is None: 45 | mp.set_start_method('spawn') 46 | if launcher == 'pytorch': 47 | _init_dist_pytorch(backend, **kwargs) 48 | elif launcher == 'mpi': 49 | _init_dist_mpi(backend, **kwargs) 50 | elif launcher == 'slurm': 51 | _init_dist_slurm(backend, **kwargs) 52 | else: 53 | raise ValueError('Invalid launcher type: {}'.format(launcher)) 54 | 55 | 56 | def set_random_seed(seed): 57 | random.seed(seed) 58 | np.random.seed(seed) 59 | torch.manual_seed(seed) 60 | torch.cuda.manual_seed_all(seed) 61 | 62 | 63 | def get_root_logger(save_dir, log_level=logging.INFO, filename="log.txt"): 64 | logger = logging.getLogger() 65 | if not logger.hasHandlers(): 66 | logging.basicConfig( 67 | format='%(asctime)s - %(levelname)s - %(message)s', 68 | level=log_level) 69 | rank, _ = get_dist_info() 70 | if rank != 0: 71 | logger.setLevel('ERROR') 72 | if save_dir: 73 | fh = logging.FileHandler(os.path.join(save_dir, filename)) 74 | fh.setLevel(log_level) 75 | formatter = logging.Formatter("%(asctime)s - %(levelname)s: %(message)s") 76 | fh.setFormatter(formatter) 77 | logger.addHandler(fh) 78 | if rank != 0: 79 | fh.setLevel('ERROR') 80 | 81 | return logger 82 | -------------------------------------------------------------------------------- /dmb/utils/fp16/__init__.py: -------------------------------------------------------------------------------- 1 | from .decorators import auto_fp16, force_fp32 2 | from .hooks import Fp16OptimizerHook, wrap_fp16_model 3 | 4 | __all__ = ['auto_fp16', 'force_fp32', 'Fp16OptimizerHook', 'wrap_fp16_model'] 5 | -------------------------------------------------------------------------------- /dmb/utils/fp16/utils.py: -------------------------------------------------------------------------------- 1 | from collections import abc 2 | 3 | import numpy as np 4 | import torch 5 | 6 | 7 | def cast_tensor_type(inputs, src_type, dst_type): 8 | if isinstance(inputs, torch.Tensor): 9 | return inputs.to(dst_type) 10 | elif isinstance(inputs, str): 11 | return inputs 12 | elif isinstance(inputs, np.ndarray): 13 | return inputs 14 | elif isinstance(inputs, abc.Mapping): 15 | return type(inputs)({ 16 | k: cast_tensor_type(v, src_type, dst_type) 17 | for k, v in inputs.items() 18 | }) 19 | elif isinstance(inputs, abc.Iterable): 20 | return type(inputs)( 21 | cast_tensor_type(item, src_type, dst_type) for item in inputs) 22 | else: 23 | return inputs 24 | -------------------------------------------------------------------------------- /dmb/utils/registry.py: -------------------------------------------------------------------------------- 1 | # Copy from maskrcnn_benchmark 2 | # https://github.com/facebookresearch/maskrcnn-benchmark/blob/master/maskrcnn_benchmark/utils/registry.py 3 | 4 | 5 | def _register_generic(module_dict, module_name, module): 6 | assert module_name not in module_dict 7 | module_dict[module_name] = module 8 | 9 | 10 | class Registry(dict): 11 | ''' 12 | A helper class for managing registering modules, it extends a dictionary 13 | and provides a register functions. 14 | Eg. creeting a registry: 15 | some_registry = Registry({"default": default_module}) 16 | There're two ways of registering new modules: 17 | 1): normal way is just calling register function: 18 | def foo(): 19 | ... 20 | some_registry.register("foo_module", foo) 21 | 2): used as decorator when declaring the module: 22 | @some_registry.register("foo_module") 23 | @some_registry.register("foo_modeul_nickname") 24 | def foo(): 25 | ... 26 | Access of module is just like using a dictionary, eg: 27 | f = some_registry["foo_modeul"] 28 | ''' 29 | 30 | def __init__(self, *args, **kwargs): 31 | super(Registry, self).__init__(*args, **kwargs) 32 | 33 | def register(self, module_name, module=None): 34 | # used as function call 35 | if module is not None: 36 | _register_generic(self, module_name, module) 37 | return 38 | 39 | # used as decorator 40 | def register_fn(fn): 41 | _register_generic(self, module_name, fn) 42 | return fn 43 | 44 | return register_fn 45 | -------------------------------------------------------------------------------- /dmb/utils/solver.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | from mmcv.runner import obj_from_dict 4 | 5 | 6 | def build_optimizer(model, optimizer_cfg): 7 | """Build optimizer from configs. 8 | Args: 9 | model (:obj:`nn.Module`): The model with parameters to be optimized. 10 | optimizer_cfg (dict): The config dict of the optimizer. 11 | Positional fields are: 12 | - type: class name of the optimizer. 13 | - lr: base learning rate. 14 | Optional fields are: 15 | - any arguments of the corresponding optimizer type, e.g., 16 | weight_decay, momentum, etc. 17 | - paramwise_options: a dict with 3 accepted fileds 18 | (bias_lr_mult, bias_decay_mult, norm_decay_mult). 19 | `bias_lr_mult` and `bias_decay_mult` will be multiplied to 20 | the lr and weight decay respectively for all bias parameters 21 | (except for the normalization layers), and 22 | `norm_decay_mult` will be multiplied to the weight decay 23 | for all weight and bias parameters of normalization layers. 24 | Returns: 25 | torch.optim.Optimizer: The initialized optimizer. 26 | """ 27 | if hasattr(model, 'module'): 28 | model = model.module 29 | 30 | optimizer_cfg = optimizer_cfg.copy() 31 | paramwise_options = optimizer_cfg.pop('paramwise_options', None) 32 | # if no paramwise option is specified, just use the global setting 33 | if paramwise_options is None: 34 | return obj_from_dict(optimizer_cfg, torch.optim, dict(params=model.parameters())) 35 | else: 36 | assert isinstance(paramwise_options, dict) 37 | # get base lr and weight decay 38 | base_lr = optimizer_cfg['lr'] 39 | base_wd = optimizer_cfg.get('weight_decay', None) 40 | # weight_decay must be explicitly specified if mult is specified 41 | if ('bias_decay_mult' in paramwise_options or 'norm_decay_mult' in paramwise_options): 42 | assert base_wd is not None 43 | 44 | # get param-wise options 45 | bias_lr_mult = paramwise_options.get('bias_lr_mult', 1.) 46 | bias_decay_mult = paramwise_options.get('bias_decay_mult', 1.) 47 | norm_decay_mult = paramwise_options.get('norm_decay_mult', 1.) 48 | # set param-wise lr and weight decay 49 | params = [] 50 | for name, param in model.named_parameters(): 51 | if not param.requires_grad: 52 | continue 53 | 54 | param_group = {'params': [param]} 55 | # for norm layers, overwrite the weight decay of weight and bias 56 | # TODO: obtain the norm layer prefixes dynamically 57 | if re.search(r'(bn|gn)(\d+)?.(weight|bias)', name): 58 | if base_wd is not None: 59 | param_group['weight_decay'] = base_wd * norm_decay_mult 60 | # for other layers, overwrite both lr and weight decay of bias 61 | elif name.endswith('.bias'): 62 | param_group['lr'] = base_lr * bias_lr_mult 63 | if base_wd is not None: 64 | param_group['weight_decay'] = base_wd * bias_decay_mult 65 | # otherwise use the global settings 66 | 67 | params.append(param_group) 68 | 69 | optimizer_cls = getattr(torch.optim, optimizer_cfg.pop('type')) 70 | return optimizer_cls(params, **optimizer_cfg) 71 | -------------------------------------------------------------------------------- /dmb/utils/tensorboard_logger.py: -------------------------------------------------------------------------------- 1 | import os.path as osp 2 | import numpy as np 3 | 4 | from mmcv.runner import LoggerHook, master_only 5 | 6 | 7 | class TensorboardLoggerHook(LoggerHook): 8 | """ 9 | Hook for starting a tensor-board logger. 10 | 11 | Args: 12 | log_dir (str or Path): dir to save logger file. 13 | interval (int): logging interval, default is 10 14 | ignore_last: 15 | reset_flag: 16 | register_logWithIter_keyword: 17 | """ 18 | 19 | def __init__( 20 | self, 21 | log_dir=None, 22 | interval=10, 23 | ignore_last=True, 24 | reset_flag=True, 25 | register_logWithIter_keyword=None 26 | ): 27 | super(TensorboardLoggerHook, self).__init__(interval, ignore_last, 28 | reset_flag) 29 | self.log_dir = log_dir 30 | self.register_logWithIter_keyword = register_logWithIter_keyword 31 | 32 | @master_only 33 | def before_run(self, runner): 34 | try: 35 | from tensorboardX import SummaryWriter 36 | except ImportError: 37 | raise ImportError('Please install tensorflow and tensorboardX ' 38 | 'to use TensorboardLoggerHook.') 39 | else: 40 | if self.log_dir is None: 41 | self.log_dir = osp.join(runner.work_dir, 'tf_logs') 42 | self.writer = SummaryWriter(self.log_dir) 43 | 44 | @master_only 45 | def single_log(self, tag, record, global_step): 46 | # self-defined, in format: prefix/suffix_tag 47 | prefix = tag.split('/')[0] 48 | suffix_tag = '/'.join(tag.split('/')[1:]) 49 | if prefix == 'image': 50 | self.writer.add_image(suffix_tag, record, global_step) 51 | return 52 | if prefix == 'figure': 53 | self.writer.add_figure(suffix_tag, record, global_step) 54 | return 55 | if prefix == 'histogram': 56 | self.writer.add_histogram(suffix_tag, record, global_step) 57 | return 58 | if prefix == 'scalar': 59 | self.writer.add_scalar(suffix_tag, record, global_step) 60 | return 61 | 62 | if isinstance(record, str): 63 | self.writer.add_text(tag, record, global_step) 64 | return 65 | 66 | if record.size > 1: 67 | self.writer.add_image(tag, record, global_step) 68 | else: 69 | self.writer.add_scalar(tag, record, global_step) 70 | 71 | @master_only 72 | def log(self, runner): 73 | for var in runner.log_buffer.output: 74 | if var in ['time', 'data_time']: 75 | continue 76 | tag = var 77 | record = runner.log_buffer.output[var] 78 | 79 | global_step = runner.epoch 80 | 81 | # for example, loss will be log as iteration 82 | if isinstance(self.register_logWithIter_keyword, (tuple, list)): 83 | for keyword in self.register_logWithIter_keyword: 84 | if var.find(keyword) > -1: 85 | global_step = runner.iter 86 | 87 | global_step = global_step + 1 88 | 89 | if isinstance(record, (list, tuple)): 90 | for idx, rec in enumerate(record): 91 | tag = var + '/' + '{}'.format(idx) 92 | self.single_log(tag, rec, global_step) 93 | else: 94 | self.single_log(tag, record, global_step) 95 | 96 | @master_only 97 | def after_run(self, runner): 98 | self.writer.close() 99 | -------------------------------------------------------------------------------- /dmb/visualization/__init__.py: -------------------------------------------------------------------------------- 1 | from .flow import SaveResultTool as FlowSaveResultTool 2 | from .stereo import SaveResultTool as DispSaveResultTool 3 | 4 | def SaveResultTool(task): 5 | if task == 'stereo': 6 | return DispSaveResultTool() 7 | elif task == 'flow': 8 | return FlowSaveResultTool() 9 | else: 10 | raise NotImplementedError 11 | 12 | 13 | from .flow import ShowResultTool as FlowShowResultTool 14 | from .stereo import ShowResultTool as DispShowResultTool 15 | 16 | def ShowResultTool(task): 17 | if task == 'stereo': 18 | return DispShowResultTool() 19 | elif task == 'flow': 20 | return FlowShowResultTool() 21 | else: 22 | raise NotImplementedError 23 | -------------------------------------------------------------------------------- /dmb/visualization/flow/__init__.py: -------------------------------------------------------------------------------- 1 | from .vis import flow_to_color, flow_err_to_color, flow_max_rad, tensor_to_color, chw_to_hwc, group_color 2 | from .show_result import ShowResultTool, ShowFlow 3 | from .save_result import SaveResultTool 4 | from .vis_hooks import DistFlowVisHook -------------------------------------------------------------------------------- /dmb/visualization/flow/save_result.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | import skimage 3 | import skimage.io 4 | import skimage.transform 5 | import os 6 | import os.path as osp 7 | 8 | from mmcv import mkdir_or_exist 9 | from dmb.data.datasets.utils.load_flow import write_flo 10 | from dmb.visualization.flow.show_result import ShowResultTool 11 | 12 | class SaveResultTool(object): 13 | def __call__(self, result, out_dir, image_name): 14 | result_tool = ShowResultTool() 15 | result = result_tool(result) 16 | if 'GrayDisparity' in result.keys(): 17 | grayEstDisp = result['GrayDisparity'] 18 | gray_save_path = osp.join(out_dir, 'flow_0') 19 | mkdir_or_exist(gray_save_path) 20 | skimage.io.imsave(osp.join(gray_save_path, image_name), (grayEstDisp * 256).astype('uint16')) 21 | 22 | if 'ColorDisparity' in result.keys(): 23 | colorEstDisp = result['ColorDisparity'] 24 | color_save_path = osp.join(out_dir, 'color_disp') 25 | mkdir_or_exist(color_save_path) 26 | plt.imsave(osp.join(color_save_path, image_name), colorEstDisp, cmap=plt.cm.hot) 27 | 28 | if 'GroupColor' in result.keys(): 29 | group_save_path = os.path.join(out_dir, 'group_flow') 30 | mkdir_or_exist(group_save_path) 31 | plt.imsave(osp.join(group_save_path, image_name), result['GroupColor'], cmap=plt.cm.hot) 32 | 33 | -------------------------------------------------------------------------------- /dmb/visualization/stereo/__init__.py: -------------------------------------------------------------------------------- 1 | from .vis import disp_to_color, tensor_to_color, disp_err_to_color, group_color 2 | from .show_result import ShowDisp, ShowConf, ShowResultTool 3 | from .save_result import SaveResultTool 4 | from .vis_hooks import DistStereoVisHook 5 | from .sparsification_plot import sparsification_plot 6 | -------------------------------------------------------------------------------- /dmb/visualization/stereo/save_result.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | import skimage 3 | import skimage.io 4 | import skimage.transform 5 | import os 6 | import os.path as osp 7 | 8 | from mmcv import mkdir_or_exist 9 | from dmb.visualization.stereo.show_result import ShowResultTool 10 | 11 | 12 | class SaveResultTool(object): 13 | def __call__(self, result, out_dir, image_name): 14 | result_tool = ShowResultTool() 15 | result = result_tool(result, color_map='gray', bins=100) 16 | 17 | if 'GrayDisparity' in result.keys(): 18 | grayEstDisp = result['GrayDisparity'] 19 | gray_save_path = osp.join(out_dir, 'disp_0') 20 | mkdir_or_exist(gray_save_path) 21 | skimage.io.imsave(osp.join(gray_save_path, image_name), (grayEstDisp * 256).astype('uint16')) 22 | 23 | if 'ColorDisparity' in result.keys(): 24 | colorEstDisp = result['ColorDisparity'] 25 | color_save_path = osp.join(out_dir, 'color_disp') 26 | mkdir_or_exist(color_save_path) 27 | plt.imsave(osp.join(color_save_path, image_name), colorEstDisp, cmap=plt.cm.hot) 28 | 29 | if 'GroupColor' in result.keys(): 30 | group_save_path = os.path.join(out_dir, 'group_disp') 31 | mkdir_or_exist(group_save_path) 32 | plt.imsave(osp.join(group_save_path, image_name), result['GroupColor'], cmap=plt.cm.hot) 33 | 34 | if 'ColorConfidence' in result.keys(): 35 | conf_save_path = os.path.join(out_dir, 'confidence') 36 | mkdir_or_exist(conf_save_path) 37 | plt.imsave(osp.join(conf_save_path, image_name), result['ColorConfidence']) 38 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | numpy>=1.20.0 2 | pandas 3 | matplotlib 4 | scikit-image 5 | scipy 6 | cffi 7 | imageio 8 | tqdm 9 | tensorflow 10 | tensorboardX 11 | yacs 12 | pyyaml 13 | easydict 14 | cython 15 | mmcv==1.3.5 16 | ipython 17 | thop 18 | torch>=1.7.1 19 | torchvision>=0.8.2 20 | torchaudio>=0.7.2 21 | pypng 22 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import find_packages 2 | from setuptools import setup 3 | 4 | setup( 5 | name="dmb", 6 | version="1.0", 7 | author="Youmi, Minwell", 8 | description="dense matching benchmark in pytorch", 9 | packages=find_packages(exclude=("tests")), 10 | ) 11 | 12 | -------------------------------------------------------------------------------- /tests/data/datasets/flow/test_flying_chairs.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | import unittest 4 | 5 | from mmcv import Config 6 | 7 | from dmb.data.datasets.flow.builder import build_flow_dataset as build_dataset 8 | 9 | 10 | class TestFlyingChairsDataset(unittest.TestCase): 11 | 12 | def setUp(self): 13 | config = dict( 14 | data=dict( 15 | train=dict( 16 | type='FlyingChairs', 17 | data_root='/home/youmin/data/OpticalFlow/FlyingChairs/', 18 | annfile='/home/youmin/data/annotations/FlyingChairs/test.json', 19 | input_shape=[256, 448], 20 | mean=[0.485, 0.456, 0.406], 21 | std=[0.229, 0.224, 0.225], 22 | ) 23 | ) 24 | ) 25 | cfg = Config(config) 26 | self.dataset = build_dataset(cfg, 'train') 27 | 28 | def test_anno_loader(self): 29 | print(self.dataset) 30 | print(self.dataset.data_list[111]) 31 | 32 | def test_get_item(self): 33 | for i in range(10): 34 | sample = self.dataset[i] 35 | assert isinstance(sample, dict) 36 | for k, v in zip(sample.keys(), sample.values()): 37 | if isinstance(v, torch.Tensor): 38 | print(k, ': with shape', v.shape) 39 | if isinstance(v, (tuple, list)): 40 | print(k, ': ', v) 41 | if v is None: 42 | print(k, ' is None') 43 | 44 | # @unittest.skip('just skip') 45 | def test_all_data(self): 46 | from tqdm import tqdm 47 | for idx in tqdm(range(len(self.dataset))): 48 | try: 49 | item = self.dataset[idx] 50 | except ValueError: 51 | print('Cannot find: {} -> {}'.format(idx, self.dataset.data_list[idx])) 52 | 53 | 54 | if __name__ == '__main__': 55 | unittest.main() 56 | -------------------------------------------------------------------------------- /tests/data/datasets/flow/yaml_to_json.py: -------------------------------------------------------------------------------- 1 | import yaml 2 | import os.path as osp 3 | import json 4 | from tqdm import tqdm 5 | 6 | if __name__ == '__main__': 7 | type = 'FlyingChairs' 8 | root = '/home/youmin/data/OpticalFlow/{}/'.format(type) 9 | annFile = '/home/youmin/data/annotations/{}/flyingchairs_val.yml'.format(type) 10 | saveFile = '/home/youmin/data/annotations/{}/eval.json'.format(type) 11 | data_list = [] 12 | with open(file=annFile, mode='r') as fp: 13 | data_list.extend(yaml.load(fp, Loader=yaml.BaseLoader)) 14 | 15 | Metas = [] 16 | for idx in range(len(data_list)): 17 | item = data_list[idx] 18 | meta = dict( 19 | left_image_path = item[0], 20 | right_image_path = item[1], 21 | ) 22 | if len(item) > 2: 23 | meta.update(flow_path=item[2]) 24 | Metas.append(meta) 25 | 26 | for meta in tqdm(Metas): 27 | for k, v in meta.items(): 28 | assert osp.exists(osp.join(root, v)), 'Metas:{} not exists'.format(v) 29 | 30 | info_str = '{} Dataset contains:\n' \ 31 | ' {:5d} samples'.format(type, len(Metas)) 32 | print(info_str) 33 | 34 | print('Save to {}'.format(saveFile)) 35 | with open(file=saveFile, mode='w') as fp: 36 | json.dump(Metas, fp=fp) 37 | -------------------------------------------------------------------------------- /tests/data/datasets/stereo/test_kitti.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | import unittest 4 | 5 | from mmcv import Config 6 | 7 | from dmb.data.datasets.stereo import build_stereo_dataset as build_dataset 8 | 9 | 10 | class TestKITTIDataset(unittest.TestCase): 11 | 12 | def setUp(self): 13 | config = dict( 14 | data=dict( 15 | test=dict( 16 | type='KITTI-2015', 17 | data_root='datasets/KITTI-2015/', 18 | annfile='datasets/KITTI-2015/annotations/full_eval.json', 19 | input_shape=[384, 1248], 20 | mean=[0.485, 0.456, 0.406], 21 | std=[0.229, 0.224, 0.225], 22 | toRAM=False, 23 | ) 24 | ) 25 | ) 26 | cfg = Config(config) 27 | self.dataset = build_dataset(cfg, 'test') 28 | 29 | import pdb 30 | pdb.set_trace() 31 | 32 | def test_anno_loader(self): 33 | print(self.dataset) 34 | print('toRAM: ', self.dataset.toRAM) 35 | print(self.dataset.data_list[31]) 36 | 37 | def test_get_item(self): 38 | for i in range(10): 39 | sample = self.dataset[i] 40 | assert isinstance(sample, dict) 41 | print("*" * 20) 42 | print("Before scatter") 43 | print("*" * 20) 44 | for k, v in zip(sample.keys(), sample.values()): 45 | if isinstance(v, torch.Tensor): 46 | print(k, ': with shape', v.shape) 47 | if isinstance(v, (tuple, list)): 48 | print(k, ': ', v) 49 | if v is None: 50 | print(k, ' is None') 51 | 52 | 53 | if __name__ == '__main__': 54 | unittest.main() 55 | -------------------------------------------------------------------------------- /tests/data/datasets/stereo/test_scene_flow.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | import unittest 4 | 5 | from mmcv import Config 6 | 7 | from dmb.data.datasets.stereo.builder import build_stereo_dataset as build_dataset 8 | 9 | 10 | class TestSceneFlowDataset(unittest.TestCase): 11 | 12 | def setUp(self): 13 | config = dict( 14 | data=dict( 15 | train=dict( 16 | type='SceneFlow', 17 | data_root='/home/youmin/data/StereoMatching/SceneFlow/', 18 | annfile='/home/youmin/data/annotations/SceneFlow/cleanpass_train.json', 19 | input_shape=[256, 512], 20 | mean=[0.485, 0.456, 0.406], 21 | std=[0.229, 0.224, 0.225], 22 | ) 23 | ) 24 | ) 25 | cfg = Config(config) 26 | self.dataset = build_dataset(cfg, 'train') 27 | 28 | def test_anno_loader(self): 29 | print(self.dataset) 30 | print(self.dataset.data_list[111]) 31 | 32 | def test_get_item(self): 33 | for i in range(10): 34 | sample = self.dataset[i] 35 | assert isinstance(sample, dict) 36 | for k, v in zip(sample.keys(), sample.values()): 37 | if isinstance(v, torch.Tensor): 38 | print(k, ': with shape', v.shape) 39 | if isinstance(v, (tuple, list)): 40 | print(k, ': ', v) 41 | if v is None: 42 | print(k, ' is None') 43 | 44 | def test_all_data(self): 45 | from tqdm import tqdm 46 | for idx in tqdm(range(len(self.dataset))): 47 | try: 48 | item = self.dataset[idx] 49 | except ValueError: 50 | print(idx, self.dataset.data_list[idx]) 51 | 52 | 53 | if __name__ == '__main__': 54 | unittest.main() 55 | -------------------------------------------------------------------------------- /tests/modeling/stereo/backbones/test_backbones.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import torch 4 | import torch.nn as nn 5 | from thop import profile 6 | from collections import Iterable 7 | import time 8 | import unittest 9 | 10 | from dmb.modeling.stereo.backbones import build_backbone 11 | from mmcv import Config 12 | 13 | 14 | 15 | class testBackbones(unittest.TestCase): 16 | 17 | @classmethod 18 | def setUpClass(cls): 19 | cls.device = torch.device('cuda:1') 20 | config_path = '/home/zhixiang/youmin/projects/depth/public/' \ 21 | 'DenseMatchingBenchmark/configs/AcfNet/scene_flow_uniform.py' 22 | cls.cfg = Config.fromfile(config_path) 23 | cls.backbone = build_backbone(cls.cfg) 24 | cls.backbone.to(cls.device) 25 | 26 | cls.setUpTimeTestingClass() 27 | cls.avg_time = {} 28 | 29 | @classmethod 30 | def setUpTimeTestingClass(cls): 31 | cls.iters = 50 32 | 33 | h, w = 384, 1248 34 | leftImage = torch.rand(1, 3, h, w).to(cls.device) 35 | rightImage = torch.rand(1, 3, h, w).to(cls.device) 36 | 37 | cls.backbone_input = [leftImage, rightImage] 38 | 39 | print('Input preparation successful!') 40 | 41 | def timeTemplate(self, module, module_name, *args, **kwargs): 42 | with torch.cuda.device(self.device): 43 | torch.cuda.empty_cache() 44 | if isinstance(module, nn.Module): 45 | module.eval() 46 | 47 | start_time = time.time() 48 | 49 | for i in range(self.iters): 50 | with torch.no_grad(): 51 | if len(args) > 0: 52 | module(*args) 53 | if len(kwargs) > 0: 54 | module(**kwargs) 55 | torch.cuda.synchronize(self.device) 56 | end_time = time.time() 57 | avg_time = (end_time - start_time) / self.iters 58 | print('{} reference forward once takes {:.4f}ms, i.e. {:.2f}fps'.format(module_name, avg_time*1000, (1 / avg_time))) 59 | 60 | if isinstance(module, nn.Module): 61 | module.train() 62 | 63 | self.avg_time[module_name] = avg_time 64 | 65 | # @unittest.skip("demonstrating skipping") 66 | def test_0_OutputModel(self): 67 | print(self.backbone) 68 | 69 | # @unittest.skip("demonstrating skipping") 70 | def test_1_ModelTime(self): 71 | self.timeTemplate(self.backbone, 'Model', *self.backbone_input) 72 | 73 | 74 | if __name__ == '__main__': 75 | unittest.main() 76 | 77 | 78 | -------------------------------------------------------------------------------- /tests/modeling/stereo/losses/utils/test_disp2prob.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from mmcv import Config 4 | import time 5 | import unittest 6 | 7 | from dmb.modeling.stereo.losses.utils.disp2prob import LaplaceDisp2Prob, GaussianDisp2Prob, OneHotDisp2Prob 8 | 9 | 10 | class testLosses(unittest.TestCase): 11 | 12 | def setUp(self): 13 | self.device = torch.device("cuda:1") 14 | 15 | def testCase1Laplace(self): 16 | max_disp = 5 17 | start_disp = -2 18 | dilation = 2 19 | disp_sample=None 20 | variance = 2 21 | h, w = 3, 4 22 | 23 | gtDisp = torch.rand(1, 1, h, w) * max_disp + start_disp 24 | 25 | gtDisp = gtDisp.to(self.device) 26 | gtDisp.requires_grad = True 27 | print('*' * 60) 28 | print('Ground Truth Disparity:') 29 | print(gtDisp) 30 | 31 | 32 | print('*' * 60) 33 | print('Generated disparity probability volume:') 34 | prob_volume = LaplaceDisp2Prob( 35 | gtDisp, max_disp=max_disp, variance=variance, 36 | start_disp=start_disp, dilation=dilation, disp_sample=disp_sample 37 | ).getProb() 38 | 39 | idx = 0 40 | for i in range(start_disp, max_disp + start_disp, dilation): 41 | print('Disparity {}:\n {}'.format(i, prob_volume[:, idx, ])) 42 | idx += 1 43 | 44 | def testCase2Laplace(self): 45 | max_disp = 5 46 | start_disp = -2 47 | variance = 2 48 | h, w = 3, 4 49 | disp_sample = torch.Tensor([-2, 0, 2]).repeat(1, h, w, 1).permute(0, 3, 1, 2).contiguous() 50 | 51 | 52 | gtDisp = torch.rand(1, 1, h, w) * max_disp + start_disp 53 | 54 | gtDisp = gtDisp.to(self.device) 55 | gtDisp.requires_grad = True 56 | print('*' * 60) 57 | print('Ground Truth Disparity:') 58 | print(gtDisp) 59 | 60 | print('*' * 60) 61 | print('Generated disparity probability volume:') 62 | prob_volume = LaplaceDisp2Prob( 63 | gtDisp, max_disp=max_disp, start_disp=start_disp, variance=variance, disp_sample=disp_sample 64 | ).getProb() 65 | 66 | idx = 0 67 | for i in range(disp_sample.shape[1]): 68 | print('Disparity {}:\n {}'.format(i, prob_volume[:, idx, ])) 69 | idx += 1 70 | 71 | if __name__ == '__main__': 72 | print('test probability volume!') 73 | unittest.main() 74 | 75 | -------------------------------------------------------------------------------- /tools/demo.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | python demo.py \ 3 | --config-path ../configs/AcfNet/scene_flow_adaptive.py \ 4 | --checkpoint-path /data/exps/AcfNet/scene_flow_adaptive/epoch_20.pth \ 5 | --data-root ./demo_data/ \ 6 | --device cuda:0 \ 7 | --log-dir /data/exps/AcfNet/scene_flow_adaptive/output/ \ 8 | --pad-to-shape 544 960 \ 9 | --scale-factor 1.0 \ 10 | --disp-div-factor 1.0 \ 11 | -------------------------------------------------------------------------------- /tools/demo_data/disparity/left/0.pfm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DeepMotionAIResearch/DenseMatchingBenchmark/010aeb66e3ceaf3d866036b0ca751861df39432d/tools/demo_data/disparity/left/0.pfm -------------------------------------------------------------------------------- /tools/demo_data/disparity/left/4.pfm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DeepMotionAIResearch/DenseMatchingBenchmark/010aeb66e3ceaf3d866036b0ca751861df39432d/tools/demo_data/disparity/left/4.pfm -------------------------------------------------------------------------------- /tools/demo_data/disparity/right/0.pfm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DeepMotionAIResearch/DenseMatchingBenchmark/010aeb66e3ceaf3d866036b0ca751861df39432d/tools/demo_data/disparity/right/0.pfm -------------------------------------------------------------------------------- /tools/demo_data/disparity/right/4.pfm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DeepMotionAIResearch/DenseMatchingBenchmark/010aeb66e3ceaf3d866036b0ca751861df39432d/tools/demo_data/disparity/right/4.pfm -------------------------------------------------------------------------------- /tools/demo_data/images/left/0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DeepMotionAIResearch/DenseMatchingBenchmark/010aeb66e3ceaf3d866036b0ca751861df39432d/tools/demo_data/images/left/0.png -------------------------------------------------------------------------------- /tools/demo_data/images/left/4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DeepMotionAIResearch/DenseMatchingBenchmark/010aeb66e3ceaf3d866036b0ca751861df39432d/tools/demo_data/images/left/4.png -------------------------------------------------------------------------------- /tools/demo_data/images/right/0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DeepMotionAIResearch/DenseMatchingBenchmark/010aeb66e3ceaf3d866036b0ca751861df39432d/tools/demo_data/images/right/0.png -------------------------------------------------------------------------------- /tools/demo_data/images/right/4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DeepMotionAIResearch/DenseMatchingBenchmark/010aeb66e3ceaf3d866036b0ca751861df39432d/tools/demo_data/images/right/4.png -------------------------------------------------------------------------------- /tools/dist_test.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | 4 | NGPUS=$1 5 | CFG_PATH=$2 6 | PORT=$3 7 | SHOW=$4 8 | 9 | CUDA_VISIBLE_DEVICES=0,1,2,3 python -m torch.distributed.launch --master_port $PORT --nproc_per_node=$NGPUS \ 10 | test.py $CFG_PATH --launcher pytorch --validate --gpus $NGPUS --show $SHOW 11 | -------------------------------------------------------------------------------- /tools/dist_train.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | 4 | NGPUS=$1 5 | CFG_PATH=$2 6 | PORT=$3 7 | 8 | CUDA_VISIBLE_DEVICES=0,1,2,3 python -m torch.distributed.launch --master_port $PORT --nproc_per_node=$NGPUS \ 9 | train.py $CFG_PATH --launcher pytorch --validate --gpus $NGPUS 10 | --------------------------------------------------------------------------------