├── .gitignore
├── DATA.md
├── GETTING_STARTED.md
├── INSTALL.md
├── INSTALL.sh
├── LICENSE
├── README.md
├── configs
    ├── AcfNet
    │   ├── ResultOfAcfNet.md
    │   ├── kitti_2015_adaptive.py
    │   ├── kitti_2015_uniform.py
    │   ├── scene_flow_adaptive.py
    │   └── scene_flow_uniform.py
    ├── AnyNet
    │   ├── ResultOfAnyNet.md
    │   └── scene_flow.py
    ├── DeepPruner
    │   ├── ResultOfDeepPruner.md
    │   ├── scene_flow_4x.py
    │   └── scene_flow_8x.py
    ├── GCNet
    │   ├── kitti_2015.py
    │   └── scene_flow.py
    ├── PSMNet
    │   ├── ResultOfPSMNet.md
    │   ├── kitti_2015.py
    │   └── scene_flow.py
    └── StereoNet
    │   ├── ResultOfStereoNet.md
    │   ├── scene_flow_8x_2stage.py
    │   └── scene_flow_8x_4stage.py
├── dmb
    ├── __init__.py
    ├── apis
    │   ├── __init__.py
    │   ├── inference.py
    │   └── train.py
    ├── data
    │   ├── README.md
    │   ├── __init__.py
    │   ├── datasets
    │   │   ├── __init__.py
    │   │   ├── evaluation
    │   │   │   ├── __init__.py
    │   │   │   ├── flow
    │   │   │   │   ├── __init__.py
    │   │   │   │   ├── eval.py
    │   │   │   │   ├── eval_hooks.py
    │   │   │   │   └── pixel_error.py
    │   │   │   └── stereo
    │   │   │   │   ├── __init__.py
    │   │   │   │   ├── eval.py
    │   │   │   │   ├── eval_hooks.py
    │   │   │   │   └── pixel_error.py
    │   │   ├── flow
    │   │   │   ├── __init__.py
    │   │   │   ├── base.py
    │   │   │   ├── builder.py
    │   │   │   └── flying_chairs
    │   │   │   │   ├── __init__.py
    │   │   │   │   └── base.py
    │   │   ├── stereo
    │   │   │   ├── __init__.py
    │   │   │   ├── base.py
    │   │   │   ├── builder.py
    │   │   │   ├── kitti
    │   │   │   │   ├── __init__.py
    │   │   │   │   ├── base.py
    │   │   │   │   ├── kitti_2012.py
    │   │   │   │   └── kitti_2015.py
    │   │   │   └── scene_flow
    │   │   │   │   ├── __init__.py
    │   │   │   │   └── base.py
    │   │   └── utils
    │   │   │   ├── __init__.py
    │   │   │   ├── load_disp.py
    │   │   │   └── load_flow.py
    │   ├── loaders
    │   │   ├── __init__.py
    │   │   ├── builder.py
    │   │   └── samplers.py
    │   └── transforms
    │   │   ├── __init__.py
    │   │   ├── builder.py
    │   │   ├── flow_trans.py
    │   │   ├── stereo_trans.py
    │   │   └── transforms.py
    ├── modeling
    │   ├── __init__.py
    │   ├── flow
    │   │   ├── __init__.py
    │   │   └── models
    │   │   │   └── __init__.py
    │   └── stereo
    │   │   ├── __init__.py
    │   │   ├── backbones
    │   │       ├── AnyNet.py
    │   │       ├── DeepPruner.py
    │   │       ├── GCNet.py
    │   │       ├── PSMNet.py
    │   │       ├── StereoNet.py
    │   │       ├── __init__.py
    │   │       ├── backbones.py
    │   │       └── utils
    │   │       │   ├── DenseASPP.py
    │   │       │   └── __init__.py
    │   │   ├── cmn
    │   │       ├── __init__.py
    │   │       ├── cmn.py
    │   │       └── loss.py
    │   │   ├── conf_measure
    │   │       ├── __init__.py
    │   │       ├── cal_conf.py
    │   │       ├── conf_net.py
    │   │       └── gen_conf.py
    │   │   ├── cost_processors
    │   │       ├── AnyNet.py
    │   │       ├── DeepPruner.py
    │   │       ├── __init__.py
    │   │       ├── aggregators
    │   │       │   ├── AcfNet.py
    │   │       │   ├── AnyNet.py
    │   │       │   ├── DeepPruner.py
    │   │       │   ├── GCNet.py
    │   │       │   ├── PSMNet.py
    │   │       │   ├── StereoNet.py
    │   │       │   ├── __init__.py
    │   │       │   └── builder.py
    │   │       ├── builder.py
    │   │       └── utils
    │   │       │   ├── __init__.py
    │   │       │   ├── cat_fms.py
    │   │       │   ├── correlation1d_cost.py
    │   │       │   ├── cost_norm.py
    │   │       │   ├── dif_fms.py
    │   │       │   ├── hourglass.py
    │   │       │   ├── hourglass_2d.py
    │   │       │   └── hw_hourglass.py
    │   │   ├── disp_predictors
    │   │       ├── __init__.py
    │   │       ├── builder.py
    │   │       ├── faster_soft_argmin.py
    │   │       ├── local_soft_argmin.py
    │   │       └── soft_argmin.py
    │   │   ├── disp_refinement
    │   │       ├── AnyNet.py
    │   │       ├── DeepPruner.py
    │   │       ├── StereoNet.py
    │   │       ├── __init__.py
    │   │       ├── builder.py
    │   │       └── utils
    │   │       │   ├── __init__.py
    │   │       │   ├── edge_aware.py
    │   │       │   └── min_warp_error.py
    │   │   ├── disp_samplers
    │   │       ├── DeepPruner.py
    │   │       ├── __init__.py
    │   │       ├── builder.py
    │   │       └── utils
    │   │       │   ├── __init__.py
    │   │       │   └── patch_match.py
    │   │   ├── layers
    │   │       ├── __init__.py
    │   │       ├── basic_layers.py
    │   │       ├── bilateral_filter.py
    │   │       ├── cspn.py
    │   │       ├── dilated_hourglass.py
    │   │       ├── inverse_warp.py
    │   │       └── inverse_warp_3d.py
    │   │   ├── losses
    │   │       ├── __init__.py
    │   │       ├── builder.py
    │   │       ├── conf_nll_loss.py
    │   │       ├── gerf_loss.py
    │   │       ├── inverse_warp_loss.py
    │   │       ├── relative_loss.py
    │   │       ├── smooth_l1_loss.py
    │   │       ├── stereo_focal_loss.py
    │   │       └── utils
    │   │       │   ├── __init__.py
    │   │       │   ├── disp2prob.py
    │   │       │   ├── quantile_loss.py
    │   │       │   └── ssim.py
    │   │   ├── models
    │   │       ├── AnyNet.py
    │   │       ├── DeepPruner.py
    │   │       ├── __init__.py
    │   │       └── general_stereo_model.py
    │   │   └── registry.py
    ├── ops
    │   ├── __init__.py
    │   └── spn
    │   │   ├── __init__.py
    │   │   ├── functions
    │   │       ├── __init__.py
    │   │       └── gaterecurrent2dnoind.py
    │   │   ├── gaterecurrent2dnoind_cuda.egg-info
    │   │       ├── PKG-INFO
    │   │       ├── SOURCES.txt
    │   │       ├── dependency_links.txt
    │   │       └── top_level.txt
    │   │   ├── modules
    │   │       ├── __init__.py
    │   │       └── gaterecurrent2dnoind.py
    │   │   ├── setup.py
    │   │   └── src
    │   │       ├── gaterecurrent2dnoind_cuda.cpp
    │   │       ├── gaterecurrent2dnoind_cuda.h
    │   │       ├── gaterecurrent2dnoind_kernel.cu
    │   │       └── gaterecurrent2dnoind_kernel.h
    ├── utils
    │   ├── __init__.py
    │   ├── collect_env.py
    │   ├── dist_utils.py
    │   ├── env.py
    │   ├── fp16
    │   │   ├── __init__.py
    │   │   ├── decorators.py
    │   │   ├── hooks.py
    │   │   └── utils.py
    │   ├── registry.py
    │   ├── solver.py
    │   ├── tensorboard_logger.py
    │   └── text_logger.py
    └── visualization
    │   ├── __init__.py
    │   ├── flow
    │       ├── __init__.py
    │       ├── save_result.py
    │       ├── show_result.py
    │       ├── vis.py
    │       └── vis_hooks.py
    │   └── stereo
    │       ├── __init__.py
    │       ├── save_result.py
    │       ├── show_result.py
    │       ├── sparsification_plot.py
    │       ├── vis.py
    │       └── vis_hooks.py
├── requirements.txt
├── setup.py
├── tests
    ├── data
    │   └── datasets
    │   │   ├── flow
    │   │       ├── test_flying_chairs.py
    │   │       └── yaml_to_json.py
    │   │   └── stereo
    │   │       ├── test_kitti.py
    │   │       └── test_scene_flow.py
    └── modeling
    │   └── stereo
    │       ├── backbones
    │           └── test_backbones.py
    │       ├── cost_processors
    │           └── utils
    │           │   └── test_cat_fms.py
    │       ├── disp_predictors
    │           └── test_disp_predictors.py
    │       ├── losses
    │           ├── test_stereo_focal_loss.py
    │           └── utils
    │           │   └── test_disp2prob.py
    │       └── models
    │           └── test_model.py
└── tools
    ├── UI.py
    ├── UI.ui
    ├── datasets
        ├── gen_kitti2015_anns.py
        └── gen_sceneflow_anns.py
    ├── demo.py
    ├── demo.sh
    ├── demo_data
        ├── disparity
        │   ├── left
        │   │   ├── 0.pfm
        │   │   └── 4.pfm
        │   └── right
        │   │   ├── 0.pfm
        │   │   └── 4.pfm
        └── images
        │   ├── left
        │       ├── 0.png
        │       └── 4.png
        │   └── right
        │       ├── 0.png
        │       └── 4.png
    ├── dist_test.sh
    ├── dist_train.sh
    ├── test.py
    ├── train.py
    └── view_cost.py


/.gitignore:
--------------------------------------------------------------------------------
 1 | # compilation and distribution
 2 | __pycache__
 3 | _ext
 4 | *.pyc
 5 | *.so
 6 | dmb.egg-info/
 7 | build/
 8 | dist/
 9 | 
10 | # pytorch/python/numpy formats
11 | *.pth
12 | *.pkl
13 | *.npy
14 | 
15 | # ipython/jupyter notebooks
16 | *.ipynb
17 | **/.ipynb_checkpoints/
18 | 
19 | # Editor temporaries
20 | *.swn
21 | *.swo
22 | *.swp
23 | *~
24 | 
25 | # Pycharm editor settings
26 | .idea
27 | 
28 | # vscode editor settings
29 | .vscode
30 | 
31 | # MacOS
32 | .DS_Store
33 | 


--------------------------------------------------------------------------------
/DATA.md:
--------------------------------------------------------------------------------
 1 | ### Prepare Scene Flow and KITTI dataset.
 2 | 
 3 | It is recommended to symlink the dataset root to `$DenseMatchingBenchmark/datasets/`. Related preparing tools for json file generation can be found in [tools](tools/datasets)
 4 | 
 5 | ```
 6 | ├── KITTI-2012
 7 | │   └── data_stereo_flow
 8 | │       ├── testing
 9 | │       └── training
10 | ├── KITTI-2015
11 | │   ├── calib
12 | │   ├── devkit
13 | │   ├── testing
14 | │   │   ├── image_2
15 | │   │   └── image_3
16 | │   └── training
17 | │       ├── disp_noc_0
18 | │       ├── disp_noc_1
19 | │       ├── image_2
20 | │       └── image_3
21 | └── SceneFlow
22 |     ├── calib
23 |     ├── driving
24 |     │   ├── disparity
25 |     │   ├── frames_cleanpass
26 |     │   └── frames_finalpass
27 |     ├── flyingthings3d
28 |     │   ├── disparity
29 |     │   ├── frames_cleanpass
30 |     │   └── frames_finalpass
31 |     └── Monkaa
32 |         ├── disparity
33 |         ├── frames_cleanpass
34 |         └── frames_finalpass
35 | 
36 | 
37 | ```
38 | 
39 | ### Prepare visualization dataset.
40 | 
41 | We enable evaluation and visualization for each epoch. Especially, the visualization means visualize the estimated results.
42 | 
43 | It is recommended to download the visualization data we prepared, btw, you can also prepare by yourself.
44 | 
45 | #### How To Use
46 | 
47 | To use, you just have to make the param 'data=dict(vis=...)' in config file valid.
48 | 
49 | #### Down Link
50 | The down-link for visualization data including:
51 | 1. Baidu YunPan: https://pan.baidu.com/s/1J7OBum7-kTFQV3Sbr3qT4w password: 0q8y
52 | 2. Google Drive: https://drive.google.com/open?id=1oroPkS9bYBULvRW2olpA2wLgKSxU9Ovl
53 | 
54 | 
55 | ```
56 | visualization_data
57 | ├── KITTI-2015
58 | │   ├── annotations
59 | │   ├── calib
60 | │   ├── disparity
61 | │   ├── genVisKITTI2015AnnoFile.py
62 | │   ├── genVisKITTIVOAnnoFile.py
63 | │   ├── images
64 | │   └── velodyne_points
65 | └── SceneFlow
66 |     ├── __init__.py
67 |     ├── annotations
68 |     ├── disparity
69 |     ├── genVisSFAnnoFile.py
70 |     ├── images
71 |     ├── occ
72 |     └── readme.txt
73 | ```
74 | 


--------------------------------------------------------------------------------
/INSTALL.md:
--------------------------------------------------------------------------------
  1 | ## Installation
  2 | 
  3 | ### Requirements:
  4 | - PyTorch1.7.1+, Python3.8+, Cuda11.0+
  5 | - mmcv==1.3.5
  6 | 
  7 | We have verified with Pytorch==1.7.1, the code can train/val/test normally. But with limited GPU resources, experiments are not runned again.
  8 | 
  9 | So, if you find worse result, you can checkout to 177c56ca1952f54d28e6073afa2c16981113a2af
 10 | ```bash
 11 | cd $DenseMatchingBenchmark
 12 | git checkout 177c56ca1952f54d28e6073afa2c16981113a2af
 13 | ```
 14 | 
 15 | ### Option 1: Step-by-step installation
 16 | 
 17 | a. Create a conda virtual environment and activate it. Then install required packages.
 18 | 
 19 | 
 20 | ```bash
 21 | # first, make sure that your conda is setup properly with the right environment
 22 | # for that, check that `which conda`, `which pip` and `which python` points to the
 23 | # right path. From a clean conda env, this is what you need to do
 24 | 
 25 | conda create --name dense_matching_benchmark python=3.8
 26 | conda activate dense_matching_benchmark
 27 | 
 28 | # this installs the right pip and dependencies for the fresh python
 29 | conda install ipython
 30 | conda install pip
 31 | 
 32 | # install required packages from requirements.txt
 33 | pip install -r requirements.txt
 34 | ```
 35 | 
 36 | b. Install PyTorch stable or nightly and torchvision following the [official instructions](https://pytorch.org/).
 37 | 
 38 | c0. Install detectron2
 39 | ```bash
 40 | python -m pip install detectron2 -f \
 41 |   https://dl.fbaipublicfiles.com/detectron2/wheels/cu110/torch1.7/index.html
 42 | ```
 43 | 
 44 | c1. Install apex
 45 | ```bash
 46 | # optional step:
 47 | export CUDA_HOME=/usr/local/cuda-x.x/
 48 | # where x.x corresponds to your CUDA version used to install pytorch
 49 | 
 50 | git clone https://github.com/NVIDIA/apex.git
 51 | cd apex
 52 | python setup.py install --cuda_ext --cpp_ext
 53 | ```
 54 | *Potential ISSUE*: [complie error](https://github.com/NVIDIA/apex/issues/802#issuecomment-618699214) 
 55 | ```
 56 | # optional step: 
 57 | git checkout f3a960f80244cf9e80558ab30f7f7e8cbf03c0a0
 58 | ```
 59 | 
 60 | c2. Install [spatial-correlation-sampler](https://github.com/ClementPinard/Pytorch-Correlation-extension)
 61 | ```bash
 62 | git clone https://github.com/ClementPinard/Pytorch-Correlation-extension
 63 | git checkout fix_1.7
 64 | cd Pytorch-Correlation-extension
 65 | python setup.py install
 66 | ```
 67 | 
 68 | 
 69 | d. Clone the DenseMatchingBenchmark repository.
 70 | 
 71 | ```bash
 72 | git clone https://github.com/DeepMotionAIResearch/DenseMatchingBenchmark.git
 73 | cd DenseMatchingBenchmark
 74 | ```
 75 | 
 76 | e. Install DenseMatchingBenchmark(other dependencies will be installed optionally).
 77 | ```bash
 78 | # libs include: dmb, spn
 79 | 
 80 | # the $1 can be: 'all', 'dmb', 'spn'
 81 | # => install all libs or specific lib, e.g. dmb
 82 | 
 83 | # the $2 can be: 'install'
 84 | # => if 'install' given, the libs will be installed into site-packages
 85 | # => if not given, the libs will be install with symbolic links,
 86 | # => so that you can modify the files if you want and won't need to re-build it
 87 | 
 88 | bash INSTALL.sh $1 $2
 89 | 
 90 | # recommend install instruction:
 91 | 
 92 | bash INSTALL.sh all
 93 | 
 94 | ```
 95 | 
 96 | ### Prepare data
 97 | 
 98 | Data prepare please refer to [DATA.md](DATA.md)
 99 | 
100 | 
101 | 
102 | ### Notice
103 | You can run `python(3) setup.py develop` or `pip install -e .` to install DenseMatchingBenchmark if you want to make modifications to it frequently.
104 | 
105 | If there are more than one DenseMatchingBenchmark on your machine, and you want to use them alternatively.
106 | Please insert the following code to the main file
107 | ```python
108 | import os.path as osp
109 | import sys
110 | sys.path.insert(0, osp.join(osp.dirname(osp.abspath(__file__)), '../'))
111 | ```
112 | or run the following command in the terminal of corresponding folder.
113 | ```shell
114 | export PYTHONPATH=`pwd`:$PYTHONPATH
115 | ```
116 | 
117 | 
118 | 
119 | 


--------------------------------------------------------------------------------
/INSTALL.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | if [[ $1 == 'spn' || $1 == 'all' ]]
 4 | then
 5 | 
 6 |     cd dmb/ops/spn/
 7 | 
 8 |     python setup.py clean
 9 |     rm -rf build
10 |     rm -r dist
11 |     rm -r *.egg-info
12 | 
13 |     if [[ $2 == 'install' ]]
14 |     then
15 |         python setup.py install
16 |     else
17 |         python setup.py build develop
18 |     fi
19 | 
20 |     cd ../../../
21 | 
22 |     echo "*********************************************************************"
23 |     echo "                         SPN installed!"
24 |     echo "*********************************************************************"
25 | 
26 | fi
27 | 
28 | 
29 | if [[ $1 == 'dmb' || $1 == 'all' ]]
30 | then
31 | 
32 |     python setup.py clean
33 |     rm -r build
34 |     rm -r dist
35 |     rm -r *.egg-info
36 | 
37 |     if [[ $2 == 'install' ]]
38 |     then
39 |         python setup.py install
40 |     else
41 |         python setup.py build develop
42 |     fi
43 | 
44 |     echo "*********************************************************************"
45 |     echo "                         dmb installed!"
46 |     echo "*********************************************************************"
47 | 
48 | fi
49 | 
50 | 
51 | echo "*********************************************************************"
52 | echo "                Dense Matching Benchmark Installed!"
53 | echo "*********************************************************************"
54 | 
55 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2018 Facebook
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 
23 | 
24 | 


--------------------------------------------------------------------------------
/configs/AnyNet/ResultOfAnyNet.md:
--------------------------------------------------------------------------------
 1 | # Result of AnyNet
 2 | 
 3 | ## Model Info
 4 | 
 5 | ```
 6 | @article{wang2018anytime,
 7 |   title={Anytime Stereo Image Depth Estimation on Mobile Devices},
 8 |   author={Wang, Yan and Lai, Zihang and Huang, Gao and Wang, Brian H. and Van Der Maaten, Laurens and Campbell, Mark and Weinberger, Kilian Q},
 9 |   journal={arXiv preprint arXiv:1810.11408},
10 |   year={2018}
11 | }
12 | ```
13 | 
14 | * Note: Test on GTX1080Ti, with resolution 384x1248.
15 | 
16 | |    Model Name         |   FLOPS   | Parameters | FPS  | Time(ms) |
17 | |:---------------------:|:---------:|:----------:|:----:|:--------:|
18 | |       AnyNet          |  1.476G   |  46.987K   | 
19 | 
20 | 
21 | 
22 | ## Experiment
23 | 
24 | 
25 | **hints**
26 | 
27 | * batch size: n * m, n GPUs m batch/GPU
28 | * pass: clean pass or final pass of Scene Flow dataset, default clean pass
29 | * weight init: initialize the convolution/bn layer while training from scratch, default no initialization
30 | * synced bn: weather use synced bn provided by apex, default False
31 | * float16: weather use mixture precision training with level 01 provided by apex, default False
32 | * scale loss: the loss scale factor when using apex
33 | * time: time consuming including the training and evaluating time, in format: x h(hour) y m(minute)
34 | * EPE: end-point-error for SceneFlow
35 | * D1(all): 3PE(px) & 5% for KITTI 2015
36 | 
37 | 
38 | ### SceneFlow
39 | 
40 | RMSProp, lr(20 epochs) schedule: 1-20 with lr\*1
41 | 
42 | - Inference with 1 GPU takes long time
43 | - Although training for 20 epochs, but we find epoch=12 get the best result
44 | 
45 | |  lr   |batch size |weight init| synced bn | float16   |loss scale | EPE(px)|  time  | BaiDuYun | GoogleDrive |
46 | |:-----:|:---------:|:---------:|:---------:|:---------:|:---------:|:------:|:------:|:--------:|:-----------:|
47 | | 0.0005| 1*6       | ✗         |  ✓        | ✗         | ✗         | 3.190  | 14h12m | [link][1], pw: dtff| [link][2] |
48 | 
49 | 
50 | 
51 | ### KITTI-2015
52 | 
53 | |  lr   |batch size |weight init| synced bn | float16   |loss scale | D1(all)  |  time  | BaiDuYun | GoogleDrive |
54 | |:-----:|:---------:|:---------:|:---------:|:---------:|:---------:|:--------:|:------:|:--------:|:-----------:|
55 | | 0.001 | 1*6       | ✗         |  ✓        | ✗         | ✗         | 
56 | 
57 | 
58 | 
59 | [1]: https://pan.baidu.com/s/10bP0TXCXHcdIg49Fv13H7Q
60 | [2]: https://drive.google.com/open?id=1_5hBOfKwg_TnMFvZr4qEkU0bEwRoRlxL
61 | 


--------------------------------------------------------------------------------
/configs/DeepPruner/ResultOfDeepPruner.md:
--------------------------------------------------------------------------------
 1 | # Result of DeepPruner
 2 | 
 3 | ## Model Info
 4 | 
 5 | ```
 6 | @inproceedings{Duggal2019ICCV,  
 7 | title = {DeepPruner: Learning Efficient Stereo Matching  via Differentiable PatchMatch},  
 8 | author = {Shivam Duggal and Shenlong Wang and Wei-Chiu Ma and Rui Hu and Raquel Urtasun},  
 9 | booktitle = {ICCV},  
10 | year = {2019}
11 | }
12 | ```
13 | 
14 | * Note: Test on GTX1080Ti, with resolution 384x1280.
15 | 
16 | |    Model Name         |   FLOPS   | Parameters | FPS  | Time(ms) |
17 | |:---------------------:|:---------:|:----------:|:----:|:--------:|
18 | |    DeepPruner-4x      | 472.125G  |   7.390M   |  3.42|  292.4   |
19 | |    DeepPruner-8x      | 194.181G  |   7.470M   |  7.67|  130.4   |
20 | 
21 | 
22 | 
23 | ## Experiment
24 | 
25 | 
26 | **hints**
27 | 
28 | * batch size: n * m, n GPUs m batch/GPU
29 | * pass: clean pass or final pass of Scene Flow dataset, default clean pass
30 | * weight init: initialize the convolution/bn layer while training from scratch, default no initialization
31 | * synced bn: weather use synced bn provided by apex, default False
32 | * float16: weather use mixture precision training with level 01 provided by apex, default False
33 | * scale loss: the loss scale factor when using apex
34 | * time: time consuming including the training and evaluating time, in format: x h(hour) y m(minute)
35 | * EPE: end-point-error for SceneFlow
36 | * D1(all): 3PE(px) & 5% for KITTI 2015
37 | 
38 | 
39 | ### SceneFlow
40 | 
41 | RMSProp, lr(15 epochs) schedule: 1-10 with lr\*1
42 | 
43 | 
44 | |    Model Name         |  lr   |batch size |weight init| synced bn | float16   |loss scale | EPE(px)| time  | BaiDuYun | GoogleDrive |
45 | |:---------------------:|:-----:|:---------:|:---------:|:---------:|:---------:|:---------:|:------:|:-----:|:--------:|:-----------:|
46 | |    DeepPruner-4x      | 0.001 | 4*2       | ✗         |  ✓        | ✗         | ✗         | 
47 | 
48 | 
49 | 
50 | ### KITTI-2015
51 | 
52 | |  lr   |batch size | synced bn |loss scale | 3PE(px) & 5% | 
53 | |:-----:|:---------:|:---------:|:---------:|:------------:|
54 | | 0.001 | 4*3       |  ✓        | ✗         | 
55 | 


--------------------------------------------------------------------------------
/configs/PSMNet/ResultOfPSMNet.md:
--------------------------------------------------------------------------------
 1 | # Result of PSMNet
 2 | 
 3 | ```
 4 | @inproceedings{chang2018pyramid,
 5 |   title={Pyramid Stereo Matching Network},
 6 |   author={Chang, Jia-Ren and Chen, Yong-Sheng},
 7 |   booktitle={Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition},
 8 |   pages={5410--5418},
 9 |   year={2018}
10 | }
11 | ```
12 | 
13 | ## Model Info
14 | 
15 | * Note: Test on GTX1080Ti, with resolution 384x1248.
16 | 
17 | |    Model Name         |   FLOPS   | Parameters | FPS  | Time(ms) |
18 | |:---------------------:|:---------:|:----------:|:----:|:--------:|
19 | |       PSMNet          | 938.186G  |  5.225M    | 1.67 |  599.2   |
20 | 
21 | 
22 | 
23 | ## Experiment
24 | 
25 | 
26 | **hints**
27 | 
28 | * batch size: n * m, n GPUs m batch/GPU
29 | * pass: clean pass or final pass of Scene Flow dataset, default clean pass
30 | * weight init: initialize the convolution/bn layer while training from scratch, default no initialization
31 | * synced bn: weather use synced bn provided by apex, default False
32 | * float16: weather use mixture precision training with level 01 provided by apex, default False
33 | * scale loss: the loss scale factor when using apex
34 | * time: time consuming including the training and evaluating time, in format: x h(hour) y m(minute)
35 | * EPE: end-point-error for SceneFlow
36 | * D1(all): 3PE(px) & 5% for KITTI 2015
37 | 
38 | 
39 | ### SceneFlow
40 | 
41 | RMSProp, lr(10 epochs) schedule: 1-10 with lr\*1
42 | 
43 | 
44 | |  lr   |batch size |weight init| synced bn | float16   |loss scale | EPE(px)|  time  | BaiDuYun | GoogleDrive |
45 | |:-----:|:---------:|:---------:|:---------:|:---------:|:---------:|:------:|:------:|:--------:|:-----------:|
46 | | 0.001 | 4*3       | ✗         |  ✓        | ✗         | ✗         | 1.112  | 22h44m | [link][1], pw: 0kxt| [link][3] |
47 | 
48 | 
49 | 
50 | ### KITTI-2015
51 | 
52 | |  lr   |batch size |weight init| synced bn | float16   |loss scale | D1(all)  |  time  | BaiDuYun | GoogleDrive |
53 | |:-----:|:---------:|:---------:|:---------:|:---------:|:---------:|:--------:|:------:|:--------:|:-----------:|
54 | | 0.001 | 4*3       | ✗         |  ✓        | ✗         | ✗         | 2.33     | 15h15m | [link][2], pw: odt8| [link][4] |
55 | 
56 | 
57 | 
58 | [1]: https://pan.baidu.com/s/1e693uEuNK6uAg3OZstDJVQ
59 | [2]: https://pan.baidu.com/s/1XnrtztXY9og3-JtBrLEGyA
60 | [3]: https://drive.google.com/open?id=1aPJiGkt9P2Lt0UCcM817YjONV2DRDEBH
61 | [4]: https://drive.google.com/drive/folders/1T__OTsViq5tkstm7jKV6p9wSs96EYUGw?usp=sharing
62 | 


--------------------------------------------------------------------------------
/configs/StereoNet/ResultOfStereoNet.md:
--------------------------------------------------------------------------------
 1 | # Result of StereoNet
 2 | 
 3 | ## Model Info
 4 | 
 5 | ```
 6 | @inproceedings{khamis2018stereonet,
 7 |   title={Stereonet: Guided hierarchical refinement for real-time edge-aware depth prediction},
 8 |   author={Khamis, Sameh and Fanello, Sean and Rhemann, Christoph and Kowdle, Adarsh and Valentin, Julien and Izadi, Shahram},
 9 |   booktitle={Proceedings of the European Conference on Computer Vision (ECCV)},
10 |   pages={573--590},
11 |   year={2018}
12 | }
13 | 
14 | @inproceedings{zhang2018activestereonet,
15 |   title={Activestereonet: End-to-end self-supervised learning for active stereo systems},
16 |   author={Zhang, Yinda and Khamis, Sameh and Rhemann, Christoph and Valentin, Julien and Kowdle, Adarsh and Tankovich, Vladimir and Schoenberg, Michael and Izadi, Shahram and Funkhouser, Thomas and Fanello, Sean},
17 |   booktitle={Proceedings of the European Conference on Computer Vision (ECCV)},
18 |   pages={784--801},
19 |   year={2018}
20 | }
21 | 
22 | ```
23 | 
24 | * Note: Test on GTX1080Ti, with resolution 384x1248.
25 | 
26 | |    Model Name         |   FLOPS   | Parameters | FPS  | Time(ms) |
27 | |:---------------------:|:---------:|:----------:|:----:|:--------:|
28 | | StereoNet-8X-2stage   | 78.512G   |  399.066K  | 19.17|  52.2    |
29 | | StereoNet-8X-4stage   | 186.719G  |  624.860K  |  8.54|  117.0   |
30 | 
31 | 
32 | 
33 | ## Experiment
34 | 
35 | 
36 | **hints**
37 | 
38 | * batch size: n * m, n GPUs m batch/GPU
39 | * pass: clean pass or final pass of Scene Flow dataset, default clean pass
40 | * weight init: initialize the convolution/bn layer while training from scratch, default no initialization
41 | * synced bn: weather use synced bn provided by apex, default False
42 | * float16: weather use mixture precision training with level 01 provided by apex, default False
43 | * scale loss: the loss scale factor when using apex
44 | * time: time consuming including the training and evaluating time, in format: x h(hour) y m(minute)
45 | * EPE: end-point-error for SceneFlow
46 | * D1(all): 3PE(px) & 5% for KITTI 2015
47 | 
48 | 
49 | ### SceneFlow
50 | 
51 | RMSProp, lr(11 epochs) schedule: 1-11 with lr\*1
52 | 
53 | - Inference with 1 GPU takes long time
54 | 
55 | |    Model Name         |  lr   |batch size |weight init| synced bn | float16   |loss scale | EPE(px)| time   | BaiDuYun | GoogleDrive |
56 | |:---------------------:|:-----:|:---------:|:---------:|:---------:|:---------:|:---------:|:------:|:------:|:--------:|:-----------:|
57 | | StereoNet-8X-2stage   | 0.001 | 1*4       | ✗         |  ✓        | ✗         | ✗         | 1.533  | 40h56m |[link][1], pw: rza0 | [link][2]|
58 | | StereoNet-8X-4stage   | 0.001 | 1*4       | ✗         |  ✓        | ✗         | ✗         | 1.329  | 143h45m|[link][3], pw: gpjm | [link][4]|
59 | 
60 | 
61 | 
62 | ### KITTI-2015
63 | 
64 | |  lr   |batch size | synced bn |loss scale | 3PE(px) & 5% | 
65 | |:-----:|:---------:|:---------:|:---------:|:------------:|
66 | | 0.001 | 1*4       |  ✓        | ✗         | 
67 | 
68 | 
69 | [1]: https://pan.baidu.com/s/1cuvjEETJUnpnxy_pFqiTRw
70 | [2]: https://drive.google.com/open?id=1cuXzQDfQ28a9gmSJichaIGYsEITGp_Qh
71 | [3]: https://pan.baidu.com/s/13DOhuuvqvNL9ksg5_85GEw
72 | [4]: https://drive.google.com/open?id=10TYF5SqN26-GsVIf2ytXALbNMBgOLH_1
73 | 


--------------------------------------------------------------------------------
/dmb/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DeepMotionAIResearch/DenseMatchingBenchmark/010aeb66e3ceaf3d866036b0ca751861df39432d/dmb/__init__.py


--------------------------------------------------------------------------------
/dmb/apis/__init__.py:
--------------------------------------------------------------------------------
1 | from .train import train_matcher
2 | from .inference import inference_stereo
3 | 


--------------------------------------------------------------------------------
/dmb/data/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DeepMotionAIResearch/DenseMatchingBenchmark/010aeb66e3ceaf3d866036b0ca751861df39432d/dmb/data/README.md


--------------------------------------------------------------------------------
/dmb/data/__init__.py:
--------------------------------------------------------------------------------
1 | from .loaders import build_data_loader
2 | 


--------------------------------------------------------------------------------
/dmb/data/datasets/__init__.py:
--------------------------------------------------------------------------------
 1 | from .stereo import build_stereo_dataset
 2 | from .stereo import SceneFlowDataset, Kitti2012Dataset, Kitti2015Dataset
 3 | 
 4 | from .flow import build_flow_dataset
 5 | from .flow import FlyingChairsDataset
 6 | 
 7 | def build_dataset(cfg, type):
 8 |     task = cfg.get('task', 'stereo')
 9 |     if task == 'stereo':
10 |         return build_stereo_dataset(cfg, type)
11 |     elif task == 'flow':
12 |         return build_flow_dataset(cfg, type)
13 | 


--------------------------------------------------------------------------------
/dmb/data/datasets/evaluation/__init__.py:
--------------------------------------------------------------------------------
 1 | from .flow import flow_output_evaluation_in_pandas
 2 | from .stereo import disp_output_evaluation_in_pandas
 3 | 
 4 | def output_evaluation_in_pandas(output_dict, task='stereo'):
 5 |     if task == 'stereo':
 6 |         return disp_output_evaluation_in_pandas(output_dict)
 7 |     elif task == 'flow':
 8 |         return flow_output_evaluation_in_pandas(output_dict)
 9 |     else:
10 |         raise NotImplementedError
11 | 
12 | 
13 | from .flow import remove_padding as flow_remove_padding
14 | from .stereo import remove_padding as disp_remove_padding
15 | 
16 | def calc_error(batch, size, task='stereo'):
17 |     if task == 'stereo':
18 |         return disp_remove_padding(batch, size)
19 |     elif task == 'flow':
20 |         return flow_remove_padding(batch, size)
21 |     else:
22 |         raise NotImplementedError
23 | 


--------------------------------------------------------------------------------
/dmb/data/datasets/evaluation/flow/__init__.py:
--------------------------------------------------------------------------------
1 | from .pixel_error import calc_error
2 | from .eval import do_evaluation, remove_padding
3 | from .eval_hooks import DistFlowEvalHook, flow_output_evaluation_in_pandas


--------------------------------------------------------------------------------
/dmb/data/datasets/evaluation/flow/eval.py:
--------------------------------------------------------------------------------
 1 | import warnings
 2 | from collections import abc as container_abcs
 3 | 
 4 | import torch
 5 | 
 6 | from dmb.data.datasets.evaluation.flow.pixel_error import calc_error
 7 | 
 8 | 
 9 | def remove_padding(batch, size):
10 |     """
11 |     Usually, the SceneFlow image size is [540, 960], and we often pad it to [544, 960] evaluation,
12 |     What's more, for KITTI, the image size is pad to [384, 1248]
13 |     Here, we mainly remove the padding from the estimated tensor, such as flow map
14 |     Args:
15 |         batch (torch.Tensor): in [BatchSize, Channel, Height, Width] layout
16 |         size (list, tuple): the last two dimensions are desired [Height, Width]
17 |     """
18 |     error_msg = "batch must contain tensors, dicts or lists; found {}"
19 |     if isinstance(batch, torch.Tensor):
20 |         # Crop batch to desired size
21 |         # For flow, we often pad image around and keep it in the center
22 |         assert batch.shape[-2] >= size[-2] and batch.shape[-1] >= size[-1]
23 | 
24 |         pad_top = (batch.shape[-2] - size[-2])//2
25 |         pad_left = (batch.shape[-1] - size[-1])//2
26 |         # pad_right = batch.shape[-1] - size[-1]
27 |         batch = batch[:, :, pad_top:, pad_left:]
28 | 
29 |         return batch
30 |     elif isinstance(batch, container_abcs.Mapping):
31 |         return {key: remove_padding(batch[key], size) for key in batch}
32 |     elif isinstance(batch, container_abcs.Sequence):
33 |         return [remove_padding(samples, size) for samples in batch]
34 | 
35 |     raise TypeError((error_msg.format(type(batch))))
36 | 
37 | 
38 | def do_evaluation(est_flow, gt_flow, sparse=False):
39 |     """
40 |     Do pixel error evaluation. (See KITTI evaluation protocols for details.)
41 |     Args:
42 |         est_flow, (Tensor): estimated flow map, in [BatchSize, 2, Height, Width] or
43 |             [2, Height, Width] layout
44 |         gt_flow, (Tensor): ground truth flow map, in [BatchSize, 2, Height, Width] or
45 |             [2, Height, Width]layout
46 |         sparse, (bool): whether the given flow is sparse, default False
47 | 
48 |     Returns:
49 |         error_dict (dict): the error of 1px, 2px, 3px, 5px, in percent,
50 |             range [0,100] and average error epe
51 |     """
52 |     error_dict = {}
53 |     if est_flow is None:
54 |         warnings.warn('Estimated flow map is None')
55 |         return error_dict
56 |     if gt_flow is None:
57 |         warnings.warn('Reference ground truth flow map is None')
58 |         return error_dict
59 | 
60 |     if torch.is_tensor(est_flow):
61 |         est_flow = est_flow.clone().cpu()
62 | 
63 |     if torch.is_tensor(gt_flow):
64 |         gt_flow = gt_flow.clone().cpu()
65 | 
66 |     error_dict = calc_error(est_flow, gt_flow, sparse=sparse)
67 | 
68 |     return error_dict
69 | 
70 | 
71 | 


--------------------------------------------------------------------------------
/dmb/data/datasets/evaluation/flow/pixel_error.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | import torch
 4 | 
 5 | def zero_mask(input, eps=1e-12):
 6 |     mask = abs(input) < eps
 7 |     return mask
 8 | 
 9 | def calc_error(est_flow=None, gt_flow=None, sparse=False):
10 |     """
11 |     Args:
12 |         est_flow (Tensor): in [BatchSize, 2, Height, Width] or
13 |                               [2, Height, Width] layout
14 |         gt_flow (Tensor): in [BatchSize, 2, Height, Width] or
15 |                              [2, Height, Width] layout
16 |         sparse, (bool): whether the given flow is sparse, default False
17 |     Output:
18 |         dict: the error of 1px, 2px, 3px, 5px, in percent,
19 |             range [0,100] and average error epe
20 |     """
21 |     error1 = torch.Tensor([0.])
22 |     error2 = torch.Tensor([0.])
23 |     error3 = torch.Tensor([0.])
24 |     error5 = torch.Tensor([0.])
25 |     epe = torch.Tensor([0.])
26 | 
27 |     if (not torch.is_tensor(est_flow)) or (not torch.is_tensor(gt_flow)):
28 |         return {
29 |             '1px': error1 * 100,
30 |             '2px': error2 * 100,
31 |             '3px': error3 * 100,
32 |             '5px': error5 * 100,
33 |             'epe': epe
34 |         }
35 | 
36 |     assert torch.is_tensor(est_flow) and torch.is_tensor(gt_flow)
37 |     assert est_flow.shape == gt_flow.shape
38 | 
39 |     est_flow = est_flow.clone().cpu()
40 |     gt_flow = gt_flow.clone().cpu()
41 |     if len(gt_flow.shape) == 3:
42 |         gt_flow = gt_flow.unsqueeze(0)
43 |         est_flow = est_flow.unsqueeze(0)
44 | 
45 |     assert gt_flow.shape[1] == 2, "flow should have horizontal and vertical dimension, " \
46 |                                   "but got {}".format(gt_flow.shape[1])
47 | 
48 |     # [B, 1, H, W]
49 |     gt_u, gt_v = gt_flow[:, 0:1, :, :], gt_flow[:, 1:2, :, :]
50 |     est_u, est_v = est_flow[:, 0:1, :, :], est_flow[:, 1:2, :, :]
51 | 
52 |     # get valid mask
53 |     # [B, 1, H, W]
54 |     mask = torch.ones(gt_u.shape, dtype=torch.bool)
55 |     if sparse:
56 |         mask = mask & (~(zero_mask(gt_u) & zero_mask(gt_v)))
57 |     mask = mask & (~(torch.isnan(gt_u) | torch.isnan(gt_v)))
58 |     mask.detach_()
59 |     if abs(mask.float().sum()) < 1.0:
60 |         return {
61 |             '1px': error1 * 100,
62 |             '2px': error2 * 100,
63 |             '3px': error3 * 100,
64 |             '5px': error5 * 100,
65 |             'epe': epe
66 |         }
67 | 
68 |     gt_u = gt_u[mask]
69 |     gt_v = gt_v[mask]
70 |     est_u = est_u[mask]
71 |     est_v = est_v[mask]
72 | 
73 |     abs_error = torch.sqrt((gt_u - est_u)**2 + (gt_v - est_v)**2)
74 |     total_num = mask.float().sum()
75 | 
76 |     error1 = torch.sum(torch.gt(abs_error, 1).float()) / total_num
77 |     error2 = torch.sum(torch.gt(abs_error, 2).float()) / total_num
78 |     error3 = torch.sum(torch.gt(abs_error, 3).float()) / total_num
79 |     error5 = torch.sum(torch.gt(abs_error, 5).float()) / total_num
80 |     epe = abs_error.float().mean()
81 | 
82 |     return {
83 |         '1px': error1 * 100,
84 |         '2px': error2 * 100,
85 |         '3px': error3 * 100,
86 |         '5px': error5 * 100,
87 |         'epe': epe
88 |     }
89 | 


--------------------------------------------------------------------------------
/dmb/data/datasets/evaluation/stereo/__init__.py:
--------------------------------------------------------------------------------
1 | from .pixel_error import calc_error
2 | from .eval import do_evaluation, do_occlusion_evaluation, remove_padding
3 | from .eval_hooks import DistStereoEvalHook, disp_output_evaluation_in_pandas
4 | 


--------------------------------------------------------------------------------
/dmb/data/datasets/evaluation/stereo/pixel_error.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | import torch
 4 | 
 5 | 
 6 | def calc_error(est_disp=None, gt_disp=None, lb=None, ub=None):
 7 |     """
 8 |     Args:
 9 |         est_disp (Tensor): in [BatchSize, Channel, Height, Width] or
10 |             [BatchSize, Height, Width] or [Height, Width] layout
11 |         gt_disp (Tensor): in [BatchSize, Channel, Height, Width] or
12 |             [BatchSize, Height, Width] or [Height, Width] layout
13 |         lb (scalar): the lower bound of disparity you want to mask out
14 |         ub (scalar): the upper bound of disparity you want to mask out
15 |     Output:
16 |         dict: the error of 1px, 2px, 3px, 5px, in percent,
17 |             range [0,100] and average error epe
18 |     """
19 |     error1 = torch.Tensor([0.])
20 |     error2 = torch.Tensor([0.])
21 |     error3 = torch.Tensor([0.])
22 |     error5 = torch.Tensor([0.])
23 |     epe = torch.Tensor([0.])
24 | 
25 |     if (not torch.is_tensor(est_disp)) or (not torch.is_tensor(gt_disp)):
26 |         return {
27 |             '1px': error1 * 100,
28 |             '2px': error2 * 100,
29 |             '3px': error3 * 100,
30 |             '5px': error5 * 100,
31 |             'epe': epe
32 |         }
33 | 
34 |     assert torch.is_tensor(est_disp) and torch.is_tensor(gt_disp)
35 |     assert est_disp.shape == gt_disp.shape
36 | 
37 |     est_disp = est_disp.clone().cpu()
38 |     gt_disp = gt_disp.clone().cpu()
39 | 
40 |     mask = torch.ones(gt_disp.shape, dtype=torch.bool)
41 |     if lb is not None:
42 |         mask = mask & (gt_disp > lb)
43 |     if ub is not None:
44 |         mask = mask & (gt_disp < ub)
45 |     mask.detach_()
46 |     if abs(mask.float().sum()) < 1.0:
47 |         return {
48 |             '1px': error1 * 100,
49 |             '2px': error2 * 100,
50 |             '3px': error3 * 100,
51 |             '5px': error5 * 100,
52 |             'epe': epe
53 |         }
54 | 
55 |     gt_disp = gt_disp[mask]
56 |     est_disp = est_disp[mask]
57 | 
58 |     abs_error = torch.abs(gt_disp - est_disp)
59 |     total_num = mask.float().sum()
60 | 
61 |     error1 = torch.sum(torch.gt(abs_error, 1).float()) / total_num
62 |     error2 = torch.sum(torch.gt(abs_error, 2).float()) / total_num
63 |     error3 = torch.sum(torch.gt(abs_error, 3).float()) / total_num
64 |     error5 = torch.sum(torch.gt(abs_error, 5).float()) / total_num
65 |     epe = abs_error.float().mean()
66 | 
67 |     return {
68 |         '1px': error1 * 100,
69 |         '2px': error2 * 100,
70 |         '3px': error3 * 100,
71 |         '5px': error5 * 100,
72 |         'epe': epe
73 |     }
74 | 


--------------------------------------------------------------------------------
/dmb/data/datasets/flow/__init__.py:
--------------------------------------------------------------------------------
1 | from .flying_chairs import FlyingChairsDataset
2 | 
3 | from .builder import build_flow_dataset


--------------------------------------------------------------------------------
/dmb/data/datasets/flow/base.py:
--------------------------------------------------------------------------------
 1 | import os.path as osp
 2 | import numpy as np
 3 | import yaml
 4 | 
 5 | from torch.utils.data import Dataset
 6 | 
 7 | 
 8 | class FlowDatasetBase(Dataset):
 9 |     def __init__(self, annFile, root, transform):
10 |         self.annFile = annFile
11 |         self.root = root
12 |         self.data_list = self.annLoader()
13 | 
14 |         # transforms for data augmentation
15 |         self.transform = transform
16 | 
17 |         self.flag = np.zeros(len(self.data_list), dtype=np.int64)
18 | 
19 |     def annLoader(self):
20 |         data_list = []
21 |         with open(file=self.annFile, mode='r') as fp:
22 |             data_list.extend(yaml.load(fp, Loader=yaml.BaseLoader))
23 |         return data_list
24 | 
25 | 
26 |     def Loader(self, item):
27 |         raise NotImplementedError
28 | 
29 |     def __getitem__(self, idx):
30 |         item = self.data_list[idx]
31 |         sample = self.Loader(item)
32 | 
33 |         if self.transform is not None:
34 |             sample = self.transform(sample)
35 | 
36 |         return sample
37 | 
38 |     def __len__(self):
39 |         return len(self.data_list)
40 | 
41 |     def __repr__(self):
42 |         repr_str = '{}\n'.format(self.__class__.__name__)
43 |         repr_str += ' ' * 4 + 'Root: {}\n'.format(self.root)
44 |         repr_str += ' ' * 4 + 'annFile: {}\n'.format(self.annFile)
45 |         repr_str += ' ' * 4 + 'Length: {}\n'.format(self.__len__())
46 | 
47 |         return repr_str
48 | 
49 |     @property
50 |     def name(self):
51 |         raise NotImplementedError
52 | 


--------------------------------------------------------------------------------
/dmb/data/datasets/flow/builder.py:
--------------------------------------------------------------------------------
 1 | from dmb.data.transforms import Compose
 2 | from dmb.data.transforms import flow_trans as T
 3 | 
 4 | from dmb.data.datasets.flow import FlyingChairsDataset
 5 | 
 6 | 
 7 | def build_transforms(cfg, type, is_train):
 8 |     input_shape = cfg.data[type].input_shape
 9 |     mean = cfg.data[type].mean
10 |     std = cfg.data[type].std
11 | 
12 |     if is_train:
13 |         transform = Compose(
14 |             [
15 |                 # T.RandomTranslate(10),
16 |                 # T.RandomRotate(angle=5, diff_angle=10),
17 |                 T.ToTensor(),
18 |                 T.RandomCrop(input_shape),
19 |                 # T.RandomHorizontalFlip(),
20 |                 # T.RandomVerticalFlip(),
21 |                 T.Normalize(mean, std),
22 |                 # T.ColorJitter(brightness=0.4, contrast=0.4, saturation=0.4),
23 |             ]
24 |         )
25 |     else:
26 |         transform = Compose(
27 |             [
28 |                 T.ToTensor(),
29 |                 T.CenterCat(input_shape),
30 |                 T.Normalize(mean, std),
31 |             ]
32 |         )
33 | 
34 |     return transform
35 | 
36 | 
37 | def build_flow_dataset(cfg, type):
38 |     if type not in cfg.data:
39 |         return None
40 | 
41 |     data_root = cfg.data[type].data_root
42 |     data_type = cfg.data[type].type
43 |     annFile = cfg.data[type].annfile
44 | 
45 |     is_train = True if type == 'train' else False
46 |     transforms = build_transforms(cfg, type, is_train=is_train)
47 | 
48 |     if 'FlyingChairs' in data_type:
49 |         dataset = FlyingChairsDataset(annFile, data_root, transforms)
50 |     else:
51 |         raise ValueError("invalid data type: {}".format(data_type))
52 | 
53 |     return dataset
54 | 


--------------------------------------------------------------------------------
/dmb/data/datasets/flow/flying_chairs/__init__.py:
--------------------------------------------------------------------------------
1 | from .base import FlyingChairsDataset
2 | 
3 | __all__ = ["FlyingChairsDataset"]
4 | 


--------------------------------------------------------------------------------
/dmb/data/datasets/flow/flying_chairs/base.py:
--------------------------------------------------------------------------------
 1 | import os.path as osp
 2 | import numpy as np
 3 | from imageio import imread
 4 | 
 5 | from dmb.data.datasets.flow.base import FlowDatasetBase
 6 | from dmb.data.datasets.utils import load_flying_chairs_flow
 7 | 
 8 | 
 9 | class FlyingChairsDataset(FlowDatasetBase):
10 | 
11 |     def __init__(self, annFile, root, transform=None):
12 |         super(FlyingChairsDataset, self).__init__(annFile, root, transform)
13 | 
14 |     def Loader(self, item):
15 |         # only take first three RGB channel no matter in RGB or RGBA format
16 |         leftImage = imread(
17 |             osp.join(self.root, item['left_image_path'])
18 |         ).transpose(2, 0, 1).astype(np.float32)[:3]
19 |         rightImage = imread(
20 |             osp.join(self.root, item['right_image_path'])
21 |         ).transpose(2, 0, 1).astype(np.float32)[:3]
22 | 
23 |         h, w = leftImage.shape[1], leftImage.shape[2]
24 |         original_size = (h, w)
25 | 
26 |         if 'flow_path' in item.keys() and item['flow_path'] is not None:
27 |             flow = load_flying_chairs_flow(
28 |                 osp.join(self.root, item['flow_path'])
29 |             ).transpose(2, 0, 1).astype(np.float32)
30 | 
31 |         else:
32 |             flow = None
33 | 
34 | 
35 |         return {
36 |             'leftImage': leftImage,
37 |             'rightImage': rightImage,
38 |             'flow': flow,
39 |             'original_size': original_size,
40 |         }
41 | 
42 |     @property
43 |     def name(self):
44 |         return 'FlyingChairs'
45 | 


--------------------------------------------------------------------------------
/dmb/data/datasets/stereo/__init__.py:
--------------------------------------------------------------------------------
 1 | from .scene_flow import SceneFlowDataset
 2 | from .kitti import Kitti2012Dataset, Kitti2015Dataset
 3 | 
 4 | from .builder import build_stereo_dataset
 5 | 
 6 | __all__ = [
 7 |     "build_stereo_dataset", "SceneFlowDataset",
 8 |     "Kitti2015Dataset", "Kitti2012Dataset"
 9 | ]
10 | 


--------------------------------------------------------------------------------
/dmb/data/datasets/stereo/base.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import os.path as osp
 3 | import numpy as np
 4 | 
 5 | from torch.utils.data import Dataset
 6 | 
 7 | 
 8 | class StereoDatasetBase(Dataset):
 9 |     def __init__(self, annFile, root, transform=None):
10 |         self.annFile = annFile
11 |         self.root = root
12 |         self.data_list = self.annLoader()
13 | 
14 |         # transforms for data augmentation
15 |         self.transform = transform
16 | 
17 |         self.flag = np.zeros(len(self.data_list), dtype=np.int64)
18 | 
19 |     def annLoader(self):
20 |         data_list = []
21 |         with open(file=self.annFile, mode='r') as fp:
22 |             data_list.extend(json.load(fp))
23 |         return data_list
24 | 
25 |     def Loader(self, item):
26 |         raise NotImplementedError
27 | 
28 |     def __getitem__(self, idx):
29 |         item = self.data_list[idx]
30 |         sample = self.Loader(item)
31 | 
32 |         if self.transform is not None:
33 |             sample = self.transform(sample)
34 | 
35 |         return sample
36 | 
37 |     def __len__(self):
38 |         return len(self.data_list)
39 | 
40 |     def __repr__(self):
41 |         repr_str = '{}\n'.format(self.__class__.__name__)
42 |         repr_str += ' ' * 4 + 'Root: {}\n'.format(self.root)
43 |         repr_str += ' ' * 4 + 'annFile: {}\n'.format(self.annFile)
44 |         repr_str += ' ' * 4 + 'Length: {}\n'.format(self.__len__())
45 | 
46 |         return repr_str
47 | 
48 |     @property
49 |     def name(self):
50 |         raise NotImplementedError
51 | 


--------------------------------------------------------------------------------
/dmb/data/datasets/stereo/builder.py:
--------------------------------------------------------------------------------
 1 | from dmb.data.transforms import Compose
 2 | from dmb.data.transforms import stereo_trans as T
 3 | 
 4 | from dmb.data.datasets.stereo.scene_flow import SceneFlowDataset
 5 | from dmb.data.datasets.stereo.kitti import Kitti2012Dataset, Kitti2015Dataset
 6 | 
 7 | 
 8 | def build_transforms(cfg, type, is_train):
 9 |     input_shape = cfg.data[type].input_shape
10 |     mean = cfg.data[type].mean
11 |     std = cfg.data[type].std
12 | 
13 |     if is_train:
14 |         transform = Compose(
15 |             [
16 |                 T.ToTensor(),
17 |                 T.RandomCrop(input_shape),
18 |                 T.Normalize(mean, std),
19 |             ]
20 |         )
21 |     else:
22 |         transform = Compose(
23 |             [
24 |                 T.ToTensor(),
25 |                 T.StereoPad(input_shape),
26 |                 T.Normalize(mean, std),
27 |             ]
28 |         )
29 | 
30 |     return transform
31 | 
32 | 
33 | def build_stereo_dataset(cfg, type):
34 |     if type not in cfg.data:
35 |         return None
36 | 
37 |     data_root = cfg.data[type].data_root
38 |     data_type = cfg.data[type].type
39 |     annFile = cfg.data[type].annfile
40 | 
41 |     is_train = True if type == 'train' else False
42 |     transforms = build_transforms(cfg, type, is_train=is_train)
43 | 
44 |     if 'SceneFlow' in data_type:
45 |         dataset = SceneFlowDataset(annFile, data_root, transforms)
46 |     elif 'KITTI' in data_type:
47 |         if '2012' in data_type:
48 |             dataset = Kitti2012Dataset(annFile, data_root, transforms)
49 |         elif '2015' in data_type:
50 |             dataset = Kitti2015Dataset(annFile, data_root, transforms)
51 |         else:
52 |             raise ValueError("invalid data type: {}".format(data_type))
53 |     else:
54 |         raise ValueError("invalid data type: {}".format(data_type))
55 | 
56 |     return dataset
57 | 


--------------------------------------------------------------------------------
/dmb/data/datasets/stereo/kitti/__init__.py:
--------------------------------------------------------------------------------
1 | from .kitti_2012 import Kitti2012Dataset
2 | from .kitti_2015 import Kitti2015Dataset
3 | 
4 | __all__ = ["Kitti2012Dataset", "Kitti2015Dataset"]
5 | 


--------------------------------------------------------------------------------
/dmb/data/datasets/stereo/kitti/base.py:
--------------------------------------------------------------------------------
 1 | import os.path as osp
 2 | 
 3 | import numpy as np
 4 | from imageio import imread
 5 | 
 6 | from dmb.data.datasets.stereo.base import StereoDatasetBase
 7 | 
 8 | 
 9 | class KittiDatasetBase(StereoDatasetBase):
10 | 
11 |     def __init__(self, annFile, root, transform=None):
12 |         super(KittiDatasetBase, self).__init__(annFile, root, transform)
13 | 
14 |     def Loader(self, item):
15 |         # only take first three RGB channel no matter in RGB or RGBA format
16 |         leftImage = imread(
17 |             osp.join(self.root, item['left_image_path'])
18 |         ).transpose(2, 0, 1).astype(np.float32)[:3]
19 |         rightImage = imread(
20 |             osp.join(self.root, item['right_image_path'])
21 |         ).transpose(2, 0, 1).astype(np.float32)[:3]
22 | 
23 |         h, w = leftImage.shape[1], leftImage.shape[2]
24 |         original_size = (h, w)
25 | 
26 |         sample = {
27 |             'leftImage': leftImage,
28 |             'rightImage': rightImage,
29 |             'original_size': original_size,
30 |         }
31 | 
32 | 
33 |         if 'left_disp_map_path' in item.keys() and item['left_disp_map_path'] is not None:
34 |             leftDisp = imread(
35 |                 osp.join(self.root, item['left_disp_map_path'])
36 |             ).astype(np.float32) / 256.0
37 |             leftDisp = leftDisp[np.newaxis, ...]
38 | 
39 |             sample.update(leftDisp=leftDisp)
40 | 
41 |         if 'right_disp_map_path' in item.keys() and item['right_disp_map_path'] is not None:
42 |             rightDisp = imread(
43 |                 osp.join(self.root, item['right_disp_map_path'])
44 |             ).astype(np.float32) / 256.0
45 |             rightDisp = rightDisp[np.newaxis, ...]
46 | 
47 |             sample.update(rightDisp=rightDisp)
48 | 
49 |         return sample
50 | 
51 |     @property
52 |     def name(self):
53 |         return 'KITTI'
54 | 


--------------------------------------------------------------------------------
/dmb/data/datasets/stereo/kitti/kitti_2012.py:
--------------------------------------------------------------------------------
 1 | import os.path as osp
 2 | import numpy as np
 3 | from imageio import imread
 4 | 
 5 | from dmb.data.datasets.stereo.kitti.base import KittiDatasetBase
 6 | 
 7 | 
 8 | class Kitti2012Dataset(KittiDatasetBase):
 9 | 
10 |     def __init__(self, annFile, root, transform=None):
11 |         super(Kitti2012Dataset, self).__init__(annFile, root, transform)
12 | 
13 |     @property
14 |     def name(self):
15 |         return 'KITTI-2012'
16 | 


--------------------------------------------------------------------------------
/dmb/data/datasets/stereo/kitti/kitti_2015.py:
--------------------------------------------------------------------------------
 1 | import os.path as osp
 2 | import numpy as np
 3 | from imageio import imread
 4 | 
 5 | from dmb.data.datasets.stereo.kitti.base import KittiDatasetBase
 6 | 
 7 | 
 8 | class Kitti2015Dataset(KittiDatasetBase):
 9 | 
10 |     def __init__(self, annFile, root, transform=None):
11 |         super(Kitti2015Dataset, self).__init__(annFile, root, transform)
12 | 
13 |     @property
14 |     def name(self):
15 |         return 'KITTI-2015'
16 | 


--------------------------------------------------------------------------------
/dmb/data/datasets/stereo/scene_flow/__init__.py:
--------------------------------------------------------------------------------
1 | from .base import SceneFlowDataset
2 | 
3 | __all__ = ["SceneFlowDataset"]
4 | 


--------------------------------------------------------------------------------
/dmb/data/datasets/stereo/scene_flow/base.py:
--------------------------------------------------------------------------------
 1 | import os.path as osp
 2 | import numpy as np
 3 | from imageio import imread
 4 | 
 5 | from dmb.data.datasets.stereo.base import StereoDatasetBase
 6 | from dmb.data.datasets.utils import load_scene_flow_disp
 7 | 
 8 | 
 9 | class SceneFlowDataset(StereoDatasetBase):
10 | 
11 |     def __init__(self, annFile, root, transform=None):
12 |         super(SceneFlowDataset, self).__init__(annFile, root, transform)
13 | 
14 |     def Loader(self, item):
15 |         # only take first three RGB channel no matter in RGB or RGBA format
16 |         leftImage = imread(
17 |             osp.join(self.root, item['left_image_path'])
18 |         ).transpose(2, 0, 1).astype(np.float32)[:3]
19 |         rightImage = imread(
20 |             osp.join(self.root, item['right_image_path'])
21 |         ).transpose(2, 0, 1).astype(np.float32)[:3]
22 | 
23 |         h, w = leftImage.shape[1], leftImage.shape[2]
24 |         original_size = (h, w)
25 | 
26 |         if 'left_disp_map_path' in item.keys() and item['left_disp_map_path'] is not None:
27 |             leftDisp = load_scene_flow_disp(
28 |                 osp.join(self.root, item['left_disp_map_path'])
29 |             )
30 |             leftDisp = leftDisp[np.newaxis, ...]
31 | 
32 |         else:
33 |             leftDisp = None
34 | 
35 |         if 'right_disp_map_path' in item.keys() and item['right_disp_map_path'] is not None:
36 |             rightDisp = load_scene_flow_disp(
37 |                 osp.join(self.root, item['right_disp_map_path'])
38 |             )
39 |             rightDisp = rightDisp[np.newaxis, ...]
40 | 
41 |         else:
42 |             rightDisp = None
43 | 
44 |         return {
45 |             'leftImage': leftImage,
46 |             'rightImage': rightImage,
47 |             'leftDisp': leftDisp,
48 |             'rightDisp': rightDisp,
49 |             'original_size': original_size,
50 |         }
51 | 
52 |     @property
53 |     def name(self):
54 |         return 'SceneFlow'
55 | 


--------------------------------------------------------------------------------
/dmb/data/datasets/utils/__init__.py:
--------------------------------------------------------------------------------
1 | from .load_disp import load_scene_flow_disp
2 | from .load_flow import load_flying_chairs_flow, load_flying_things_flow, load_kitti_flow, write_flying_chairs_flow
3 | 


--------------------------------------------------------------------------------
/dmb/data/datasets/utils/load_disp.py:
--------------------------------------------------------------------------------
 1 | import re
 2 | import numpy as np
 3 | 
 4 | 
 5 | def load_pfm(file_path):
 6 |     """
 7 |     load image in PFM type.
 8 |     Args:
 9 |         file_path string: file path(absolute)
10 |     Returns:
11 |         data (numpy.array): data of image in (Height, Width[, 3]) layout
12 |         scale (float): scale of image
13 |     """
14 |     with open(file_path, encoding="ISO-8859-1") as fp:
15 |         color = None
16 |         width = None
17 |         height = None
18 |         scale = None
19 |         endian = None
20 | 
21 |         # load file header and grab channels, if is 'PF' 3 channels else 1 channel(gray scale)
22 |         header = fp.readline().rstrip()
23 |         if header == 'PF':
24 |             color = True
25 |         elif header == 'Pf':
26 |             color = False
27 |         else:
28 |             raise Exception('Not a PFM file.')
29 | 
30 |         # grab image dimensions
31 |         dim_match = re.match(r'^(\d+)\s(\d+)\s$', fp.readline())
32 |         if dim_match:
33 |             width, height = map(int, dim_match.groups())
34 |         else:
35 |             raise Exception('Malformed PFM header.')
36 | 
37 |         # grab image scale
38 |         scale = float(fp.readline().rstrip())
39 |         if scale < 0:  # little-endian
40 |             endian = '<'
41 |             scale = -scale
42 |         else:
43 |             endian = '>'  # big-endian
44 | 
45 |         # grab image data
46 |         data = np.fromfile(fp, endian + 'f')
47 |         shape = (height, width, 3) if color else (height, width)
48 | 
49 |         # reshape data to [Height, Width, Channels]
50 |         data = np.reshape(data, shape)
51 |         data = np.flipud(data)
52 | 
53 |         return data, scale
54 | 
55 | 
56 | # load utils
57 | def load_scene_flow_disp(img_path):
58 |     """load scene flow disparity image
59 |     Args:
60 |         img_path:
61 |     Returns:
62 |     """
63 |     assert img_path.endswith('.pfm'), "scene flow disparity image must end with .pfm" \
64 |                                       "but got {}".format(img_path)
65 | 
66 |     disp_img, __ = load_pfm(img_path)
67 | 
68 |     return disp_img
69 | 


--------------------------------------------------------------------------------
/dmb/data/loaders/__init__.py:
--------------------------------------------------------------------------------
1 | from .builder import build_data_loader
2 | 


--------------------------------------------------------------------------------
/dmb/data/loaders/builder.py:
--------------------------------------------------------------------------------
 1 | from functools import partial
 2 | 
 3 | from torch.utils.data import DataLoader
 4 | 
 5 | from mmcv.parallel import collate
 6 | from mmcv.runner import get_dist_info
 7 | 
 8 | from .samplers import GroupSampler, DistributedGroupSampler, DistributedSampler
 9 | 
10 | # https://github.com/pytorch/pytorch/issues/973
11 | import resource
12 | 
13 | rlimit = resource.getrlimit(resource.RLIMIT_NOFILE)
14 | resource.setrlimit(resource.RLIMIT_NOFILE, (4096, rlimit[1]))
15 | 
16 | 
17 | def build_data_loader(
18 |         dataset,
19 |         imgs_per_gpu,
20 |         workers_per_gpu,
21 |         num_gpus=1,
22 |         dist=True,
23 |         **kwargs
24 | ):
25 |     shuffle = kwargs.get('shuffle', True)
26 |     if dist:
27 |         rank, world_size = get_dist_info()
28 |         if shuffle:
29 |             sampler = DistributedGroupSampler(
30 |                 dataset, imgs_per_gpu, world_size, rank
31 |             )
32 |         else:
33 |             sampler = DistributedSampler(
34 |                 dataset, world_size, rank, shuffle=False
35 |             )
36 |         batch_size = imgs_per_gpu
37 |         num_workers = workers_per_gpu
38 |     else:
39 |         sampler = GroupSampler(dataset, imgs_per_gpu) if shuffle else None
40 |         batch_size = num_gpus * imgs_per_gpu
41 |         num_workers = num_gpus * workers_per_gpu
42 | 
43 |     data_loader = DataLoader(
44 |         dataset,
45 |         batch_size=batch_size,
46 |         sampler=sampler,
47 |         num_workers=num_workers,
48 |         collate_fn=partial(collate, samples_per_gpu=imgs_per_gpu),
49 |         pin_memory=False,
50 |         **kwargs)
51 | 
52 |     return data_loader
53 | 


--------------------------------------------------------------------------------
/dmb/data/transforms/__init__.py:
--------------------------------------------------------------------------------
1 | from .transforms import Compose
2 | 


--------------------------------------------------------------------------------
/dmb/data/transforms/builder.py:
--------------------------------------------------------------------------------
1 | from . import transforms as T
2 | 
3 | 
4 | def build_transforms(cfg, is_train=True):
5 |     return None
6 | 


--------------------------------------------------------------------------------
/dmb/data/transforms/stereo_trans.py:
--------------------------------------------------------------------------------
  1 | import random
  2 | import numbers
  3 | import numpy as np
  4 | 
  5 | import torch
  6 | from torch.nn.functional import pad
  7 | import torchvision.transforms.functional as F
  8 | 
  9 | 
 10 | class ToTensor(object):
 11 |     """
 12 |     convert numpy.ndarray to torch.floatTensor, in [Channels, Height, Width]
 13 |     """
 14 |     def __call__(self, sample):
 15 |         for k in sample.keys():
 16 |             if sample[k] is not None and isinstance(sample[k], np.ndarray):
 17 |                 sample[k] = torch.from_numpy(sample[k].copy())
 18 |         return sample
 19 | 
 20 | 
 21 | class CenterCrop(object):
 22 |     """Crops the given image at central location to have a region of
 23 |     the given size. size can be a tuple (target_height, target_width)
 24 |     or an integer, in which case the target will be of a square shape (size, size)
 25 |     """
 26 | 
 27 |     def __init__(self, size):
 28 |         if isinstance(size, numbers.Number):
 29 |             self.size = (int(size), int(size))
 30 |         else:
 31 |             self.size = size
 32 | 
 33 |     def __call__(self, sample):
 34 | 
 35 |         h, w = sample['leftImage'].shape[-2:]
 36 |         th, tw = self.size
 37 |         if w == tw and h == th:
 38 |             return sample
 39 | 
 40 |         x1 = (w - tw) // 2
 41 |         y1 = (h - th) // 2
 42 | 
 43 |         for k in sample.keys():
 44 |             if sample[k] is not None and isinstance(sample[k], (np.ndarray, torch.Tensor)):
 45 |                 sample[k] = sample[k][:, y1: y1 + th, x1: x1 + tw]
 46 |         return sample
 47 | 
 48 | 
 49 | class RandomCrop(object):
 50 |     """Crops the given image at a random location to have a region of
 51 |     the given size. size can be a tuple (target_height, target_width)
 52 |     or an integer, in which case the target will be of a square shape (size, size)
 53 |     """
 54 | 
 55 |     def __init__(self, size):
 56 |         if isinstance(size, numbers.Number):
 57 |             self.size = (int(size), int(size))
 58 |         else:
 59 |             self.size = size
 60 | 
 61 |     def __call__(self, sample):
 62 | 
 63 |         h, w = sample['leftImage'].shape[-2:]
 64 |         th, tw = self.size
 65 |         if w == tw and h == th:
 66 |             return sample
 67 | 
 68 |         x1 = random.randint(0, w - tw)
 69 |         y1 = random.randint(0, h - th)
 70 | 
 71 |         for k in sample.keys():
 72 |             if sample[k] is not None and isinstance(sample[k], (np.ndarray, torch.Tensor)):
 73 |                 sample[k] = sample[k][:, y1: y1 + th, x1: x1 + tw]
 74 |         return sample
 75 | 
 76 | 
 77 | class Normalize(object):
 78 |     def __init__(self, mean, std):
 79 |         self.mean = mean
 80 |         self.std = std
 81 | 
 82 |     def __call__(self, sample):
 83 |         sample['leftImage'] = F.normalize(
 84 |             sample['leftImage'], mean=self.mean, std=self.std
 85 |         )
 86 |         sample['rightImage'] = F.normalize(
 87 |             sample['rightImage'], mean=self.mean, std=self.std
 88 |         )
 89 |         return sample
 90 | 
 91 | 
 92 | class StereoPad(object):
 93 |     def __init__(self, size):
 94 |         if isinstance(size, numbers.Number):
 95 |             self.size = (int(size), int(size))
 96 |         else:
 97 |             self.size = size
 98 | 
 99 |     def __call__(self, sample):
100 |         h, w = sample['leftImage'].shape[-2:]
101 |         th, tw = self.size
102 |         if w == tw and h == th:
103 |             return sample
104 | 
105 |         pad_left = 0
106 |         pad_right = tw - w
107 |         pad_top = th - h
108 |         pad_bottom = 0
109 | 
110 |         sample['leftImage'] = pad(
111 |             sample['leftImage'], [pad_left, pad_right, pad_top, pad_bottom],
112 |             mode='constant', value=0
113 |         )
114 |         sample['rightImage'] = pad(
115 |             sample['rightImage'], [pad_left, pad_right, pad_top, pad_bottom],
116 |             mode='constant', value=0
117 |         )
118 | 
119 |         return sample
120 | 


--------------------------------------------------------------------------------
/dmb/data/transforms/transforms.py:
--------------------------------------------------------------------------------
 1 | class Compose(object):
 2 |     def __init__(self, transforms):
 3 |         self.transforms = transforms
 4 | 
 5 |     def __call__(self, sample):
 6 |         for t in self.transforms:
 7 |             sample = t(sample)
 8 |         return sample
 9 | 
10 |     def __repr__(self):
11 |         format_string = self.__class__.__name__ + "("
12 |         for t in self.transforms:
13 |             format_string += "\n"
14 |             format_string += "    {0}".format(t)
15 |         format_string += "\n)"
16 |         return format_string
17 | 


--------------------------------------------------------------------------------
/dmb/modeling/__init__.py:
--------------------------------------------------------------------------------
 1 | from .flow.models import _META_ARCHITECTURES as _FLOW_META_ARCHITECTURES
 2 | from .stereo.models import _META_ARCHITECTURES as _STEREO_META_ARCHITECTURES
 3 | 
 4 | _META_ARCHITECTURES = dict()
 5 | 
 6 | _META_ARCHITECTURES.update(_FLOW_META_ARCHITECTURES)
 7 | _META_ARCHITECTURES.update(_STEREO_META_ARCHITECTURES)
 8 | 
 9 | 
10 | def build_model(cfg):
11 |     meta_arch = _META_ARCHITECTURES[cfg.model.meta_architecture]
12 |     return meta_arch(cfg)
13 | 


--------------------------------------------------------------------------------
/dmb/modeling/flow/__init__.py:
--------------------------------------------------------------------------------
1 | from .models import build_flow_model


--------------------------------------------------------------------------------
/dmb/modeling/flow/models/__init__.py:
--------------------------------------------------------------------------------
1 | 
2 | _META_ARCHITECTURES = {
3 | 
4 | }
5 | 
6 | 
7 | def build_flow_model(cfg):
8 |     meta_arch = _META_ARCHITECTURES[cfg.model.meta_architecture]
9 |     return meta_arch(cfg)


--------------------------------------------------------------------------------
/dmb/modeling/stereo/__init__.py:
--------------------------------------------------------------------------------
1 | from .models import build_stereo_model
2 | 


--------------------------------------------------------------------------------
/dmb/modeling/stereo/backbones/GCNet.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import torch.nn.functional as F
 4 | 
 5 | from dmb.modeling.stereo.layers.basic_layers import conv_bn_relu, BasicBlock
 6 | 
 7 | 
 8 | class GCNetBackbone(nn.Module):
 9 |     """
10 |     Backbone proposed in GCNet.
11 |     Args:
12 |         in_planes (int): the channels of input
13 |         batch_norm (bool): whether use batch normalization layer, default True
14 |     Inputs:
15 |         l_img (Tensor): left image, in [BatchSize, 3, Height, Width]
16 |         r_img (Tensor): right image, in [BatchSize, 3, Height, Width]
17 |     Outputs:
18 |         l_fms (Tensor): left image feature maps, in [BatchSize, 32, Height//2, Width//2]
19 |         right (Tensor): right image feature maps, in [BatchSize, 32, Height//2, Width//2]
20 |     """
21 | 
22 |     def __init__(self, in_planes, batch_norm=True):
23 |         super(GCNetBackbone, self).__init__()
24 |         self.in_planes = in_planes
25 | 
26 |         self.backbone = nn.Sequential(
27 |             conv_bn_relu(batch_norm, self.in_planes, 32, 5, 2, 2),
28 |             BasicBlock(batch_norm, 32, 32, 1, None, 1, 1),
29 |             BasicBlock(batch_norm, 32, 32, 1, None, 1, 1),
30 |             BasicBlock(batch_norm, 32, 32, 1, None, 1, 1),
31 |             BasicBlock(batch_norm, 32, 32, 1, None, 1, 1),
32 |             BasicBlock(batch_norm, 32, 32, 1, None, 1, 1),
33 |             BasicBlock(batch_norm, 32, 32, 1, None, 1, 1),
34 |             BasicBlock(batch_norm, 32, 32, 1, None, 1, 1),
35 |             BasicBlock(batch_norm, 32, 32, 1, None, 1, 1),
36 |             nn.Conv2d(32, 32, 3, 1, 1)
37 |         )
38 | 
39 |     def forward(self, *input):
40 |         if len(input) != 2:
41 |             raise ValueError('expected input length 2 (got {} length input)'.format(len(input)))
42 |         l_img, r_img = input
43 | 
44 |         l_fms = self.backbone(l_img)
45 |         r_fms = self.backbone(r_img)
46 | 
47 |         return l_fms, r_fms
48 | 


--------------------------------------------------------------------------------
/dmb/modeling/stereo/backbones/StereoNet.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torch.nn.functional as F
  4 | 
  5 | from dmb.modeling.stereo.layers.basic_layers import conv_bn, conv_bn_relu, BasicBlock
  6 | 
  7 | class DownsampleHead(nn.Module):
  8 |     """
  9 |     Args:
 10 |         in_planes (int): the channels of input
 11 |         out_planes (int): the channels of output
 12 |         batchNorm, (bool): whether use batch normalization layer, default True
 13 |     Inputs:
 14 |         x, (tensor): feature in (BatchSize, in_planes, Height, Width) layout
 15 |     Outputs:
 16 |         down_x, (tensor): downsampled feature in (BatchSize, out_planes, Height, Width) layout
 17 |     """
 18 | 
 19 |     def __init__(self, in_planes, out_planes, batch_norm=True):
 20 |         super(DownsampleHead, self).__init__()
 21 | 
 22 |         self.in_planes = in_planes
 23 |         self.out_planes = out_planes
 24 |         self.batch_norm = batch_norm
 25 | 
 26 |         self.downsample = nn.Conv2d(in_planes, out_planes, kernel_size=5,
 27 |                                     stride=2, padding=2, bias=True)
 28 | 
 29 |     def forward(self, x):
 30 |         down_x = self.downsample(x)
 31 |         return down_x
 32 | 
 33 | 
 34 | class StereoNetBackbone(nn.Module):
 35 |     """
 36 |     Backbone proposed in StereoNet.
 37 |     Args:
 38 |         in_planes (int): the channels of input
 39 |         batch_norm (bool): whether use batch normalization layer, default True
 40 |         downsample_num (int): the number of downsample module,
 41 |             the input RGB image will be downsample to 1/2^num resolution, default 3, i.e., 1/8 resolution
 42 |         residual_num (int): the number of residual blocks, used for robust feature extraction
 43 |     Inputs:
 44 |         l_img (Tensor): left image, in [BatchSize, 3, Height, Width] layout
 45 |         r_img (Tensor): right image, in [BatchSize, 3, Height, Width] layout
 46 |     Outputs:
 47 |         l_fms (Tensor): left image feature maps, in [BatchSize, 32, Height//8, Width//8] layout
 48 |         r_fms (Tensor): right image feature maps, in [BatchSize, 32, Height//8, Width//8] layout
 49 |     """
 50 | 
 51 |     def __init__(self, in_planes=3, batch_norm=True, downsample_num=3, residual_num=6):
 52 |         super(StereoNetBackbone, self).__init__()
 53 |         self.in_planes = in_planes
 54 |         self.batch_norm = batch_norm
 55 |         self.downsample_num = downsample_num
 56 |         self.residual_num = residual_num
 57 | 
 58 |         # Continuously downsample the input RGB image to 1/2^num resolution
 59 |         in_planes = self.in_planes
 60 |         out_planes = 32
 61 | 
 62 |         self.downsample = nn.ModuleList()
 63 |         for _ in range(self.downsample_num):
 64 |             self.downsample.append(DownsampleHead(in_planes, out_planes))
 65 |             in_planes = out_planes
 66 |             out_planes = 32
 67 | 
 68 |         # Build residual feature extraction module
 69 |         self.residual_blocks = nn.ModuleList()
 70 |         for _ in range(self.residual_num):
 71 |             self.residual_blocks.append(BasicBlock(
 72 |                 self.batch_norm, 32, 32, stride=1, downsample=None, padding=1, dilation=1
 73 |             ))
 74 | 
 75 |         self.lastconv = nn.Conv2d(32, 32, kernel_size=3, stride=1, padding=1, bias=True)
 76 | 
 77 | 
 78 |     def _forward(self, x):
 79 | 
 80 |         for i in range(self.downsample_num):
 81 |             x = self.downsample[i](x)
 82 | 
 83 |         for i in range(self.residual_num):
 84 |             x = self.residual_blocks[i](x)
 85 | 
 86 |         output_feature = self.lastconv(x)
 87 | 
 88 |         return output_feature
 89 | 
 90 |     def forward(self, *input):
 91 |         if len(input) != 2:
 92 |             raise ValueError('expected input length 2 (got {} length input)'.format(len(input)))
 93 | 
 94 |         l_img, r_img = input
 95 | 
 96 |         l_fms = self._forward(l_img)
 97 |         r_fms = self._forward(r_img)
 98 | 
 99 |         return l_fms, r_fms
100 | 


--------------------------------------------------------------------------------
/dmb/modeling/stereo/backbones/__init__.py:
--------------------------------------------------------------------------------
1 | from .backbones import build_backbone
2 | 
3 | 


--------------------------------------------------------------------------------
/dmb/modeling/stereo/backbones/backbones.py:
--------------------------------------------------------------------------------
 1 | from .GCNet import GCNetBackbone
 2 | from .PSMNet import PSMNetBackbone
 3 | from .StereoNet import StereoNetBackbone
 4 | from .DeepPruner import DeepPrunerBestBackbone, DeepPrunerFastBackbone
 5 | from .AnyNet import AnyNetBackbone
 6 | 
 7 | BACKBONES = {
 8 |     'GCNet': GCNetBackbone,
 9 |     'PSMNet': PSMNetBackbone,
10 |     'StereoNet': StereoNetBackbone,
11 |     'BestDeepPruner': DeepPrunerBestBackbone,
12 |     'FastDeepPruner': DeepPrunerFastBackbone,
13 |     'AnyNet': AnyNetBackbone,
14 | }
15 | 
16 | def build_backbone(cfg):
17 |     backbone_type = cfg.model.backbone.type
18 | 
19 |     assert backbone_type in BACKBONES, \
20 |         "model backbone type not found, excepted: {}," \
21 |                         "but got {}".format(BACKBONES.keys, backbone_type)
22 | 
23 |     default_args = cfg.model.backbone.copy()
24 |     default_args.pop('type')
25 |     default_args.update(batch_norm=cfg.model.batch_norm)
26 | 
27 |     backbone = BACKBONES[backbone_type](**default_args)
28 | 
29 |     return backbone
30 | 


--------------------------------------------------------------------------------
/dmb/modeling/stereo/backbones/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DeepMotionAIResearch/DenseMatchingBenchmark/010aeb66e3ceaf3d866036b0ca751861df39432d/dmb/modeling/stereo/backbones/utils/__init__.py


--------------------------------------------------------------------------------
/dmb/modeling/stereo/cmn/__init__.py:
--------------------------------------------------------------------------------
1 | from .cmn import build_cmn
2 | 


--------------------------------------------------------------------------------
/dmb/modeling/stereo/cmn/cmn.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import torch.nn.functional as F
 4 | 
 5 | from dmb.modeling.stereo.layers.basic_layers import conv_bn_relu
 6 | 
 7 | from .loss import make_cmn_loss_evaluator
 8 | 
 9 | 
10 | class ConfHead(nn.Module):
11 |     """
12 |     Args:
13 |         in_planes (int): usually cost volume used to calculate confidence map with $in_planes$ in Channel Dimension
14 |         batch_norm, (bool): whether use batch normalization layer, default True
15 |     Inputs:
16 |         cost, (tensor): cost volume in (BatchSize, in_planes, Height, Width) layout
17 |     Outputs:
18 |         confCost, (tensor): in (BatchSize, 1, Height, Width) layout
19 |     """
20 | 
21 |     def __init__(self, in_planes, batch_norm=True):
22 |         super(ConfHead, self).__init__()
23 | 
24 |         self.in_planes = in_planes
25 |         self.sec_in_planes = int(self.in_planes // 3)
26 |         self.sec_in_planes = self.sec_in_planes if self.sec_in_planes > 0 else 1
27 | 
28 |         self.conf_net = nn.Sequential(
29 |             conv_bn_relu(batch_norm, self.in_planes, self.sec_in_planes, 3, 1, 1, bias=False),
30 |             nn.Conv2d(self.sec_in_planes, 1, 1, 1, 0, bias=False)
31 |         )
32 | 
33 |     def forward(self, cost):
34 |         conf = self.conf_net(cost)
35 |         return conf
36 | 
37 | 
38 | # confidence measure network
39 | class Cmn(nn.Module):
40 | 
41 |     def __init__(self, cfg, in_planes, num, alpha, beta):
42 |         super(Cmn, self).__init__()
43 |         self.cfg = cfg.copy()
44 | 
45 |         batch_norm = self.cfg.model.batch_norm
46 |         conf_heads = nn.ModuleList(
47 |             [ConfHead(in_planes, batch_norm) for _ in range(num)]
48 |         )
49 |         loss_evaluator = make_cmn_loss_evaluator(cfg)
50 | 
51 |         self.alpha = alpha
52 |         self.beta = beta
53 | 
54 |         self.conf_heads = conf_heads
55 |         self.loss_evaluator = loss_evaluator
56 | 
57 |     def get_confidence(self, costs):
58 |         assert len(self.conf_heads) == len(costs), "NUM of confidence heads({}) must be equal to NUM" \
59 |                                                    "of cost volumes({})".format(len(self.conf_heads), len(costs))
60 | 
61 |         # for convenience to use log sigmoid when calculate loss,
62 |         # we don't directly confidence cost to confidence by sigmoid
63 |         conf_costs = [conf_head(cost) for cost, conf_head in zip(costs, self.conf_heads)]
64 |         # convert to confidence
65 |         confs = [torch.sigmoid(conf_cost) for conf_cost in conf_costs]
66 |         # calculate variance modulated by confidence
67 |         cost_vars = [self.alpha * (1 - conf) + self.beta for conf in confs]
68 | 
69 |         return confs, cost_vars, conf_costs
70 | 
71 |     def get_loss(self, confs, target=None):
72 |         cm_losses = self.loss_evaluator(confs, target)
73 | 
74 |         return cm_losses
75 | 
76 |     def forward(self, costs, target=None):
77 |         confs, cost_vars, conf_costs = self.get_confidence(costs)
78 | 
79 |         if self.training:
80 |             cm_losses = self.get_loss(conf_costs, target)
81 |             return cost_vars, cm_losses
82 |         else:
83 |             return cost_vars, confs
84 | 
85 | 
86 | def build_cmn(cfg):
87 |     in_planes = cfg.model.cmn.in_planes
88 |     num = cfg.model.cmn.num
89 |     alpha = cfg.model.cmn.alpha
90 |     beta = cfg.model.cmn.beta
91 | 
92 |     return Cmn(cfg, in_planes, num, alpha, beta)
93 | 


--------------------------------------------------------------------------------
/dmb/modeling/stereo/cmn/loss.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch.nn import functional as F
 3 | 
 4 | from dmb.modeling.stereo.losses.conf_nll_loss import ConfidenceNllLoss
 5 | 
 6 | 
 7 | def make_conf_nll_loss_evaluator(cfg):
 8 |     default_args = cfg.model.cmn.losses.nll_loss.copy()
 9 |     default_args.update(sparse=cfg.data.sparse)
10 |     default_args.pop('weight')
11 | 
12 |     return ConfidenceNllLoss(**default_args)
13 | 
14 | 
15 | class CMNLossEvaluator(object):
16 |     def __init__(self, cfg, loss_evaluators, loss_weights):
17 |         self.cfg = cfg.copy()
18 |         self.loss_evaluators = loss_evaluators
19 |         self.loss_weights = loss_weights
20 | 
21 |     def __call__(self, confs, target):
22 |         loss_dict = dict()
23 | 
24 |         for loss_name, loss_evaluator in self.loss_evaluators.items():
25 |             weight = self.loss_weights[loss_name]
26 |             if isinstance(loss_evaluator, ConfidenceNllLoss):
27 |                 conf_nll_loss_dict = loss_evaluator(confs, target)
28 |                 conf_nll_loss_dict = {k: v * weight for k, v in conf_nll_loss_dict.items()}
29 |                 loss_dict.update(conf_nll_loss_dict)
30 |             else:
31 |                 raise ValueError("{} not implemented.".format(loss_name))
32 | 
33 |         return loss_dict
34 | 
35 | 
36 | def make_cmn_loss_evaluator(cfg):
37 |     loss_evaluators = dict()
38 |     loss_weights = dict()
39 | 
40 |     if "nll_loss" in cfg.model.cmn.losses:
41 |         conf_nll_loss_evaluator = make_conf_nll_loss_evaluator(cfg)
42 |         loss_evaluators["conf_nll_loss"] = conf_nll_loss_evaluator
43 |         loss_weights["conf_nll_loss"] = cfg.model.cmn.losses.nll_loss.weight
44 | 
45 |     return CMNLossEvaluator(
46 |         cfg, loss_evaluators, loss_weights
47 |     )
48 | 


--------------------------------------------------------------------------------
/dmb/modeling/stereo/conf_measure/__init__.py:
--------------------------------------------------------------------------------
 1 | from .conf_net import ConfidenceEstimation
 2 | from .calc_conf import pkrConf, apkrConf, nlmConf
 3 | from .gen_conf import ConfGenerator
 4 | 
 5 | __all__ = [
 6 |     'ConfidenceEstimation',
 7 |     'pkrConf', 'apkrConf', 'nlmConf',
 8 |     'ConfGenerator',
 9 | ]
10 | 


--------------------------------------------------------------------------------
/dmb/modeling/stereo/conf_measure/conf_net.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import torch.nn.functional as F
 4 | 
 5 | from dmb.modeling.stereo.layers.basic_layers import conv_bn_relu
 6 | 
 7 | 
 8 | class ConfidenceEstimation(nn.Module):
 9 |     """
10 |     Args:
11 |         in_planes, (int): usually cost volume used to calculate confidence map with $in_planes$ in Channel Dimension
12 |         batchNorm, (bool): whether use batch normalization layer, default True
13 |     Inputs:
14 |         cost, (tensor): cost volume in (BatchSize, in_planes, Height, Width) layout
15 |     Outputs:
16 |         confCost, (tensor): in (BatchSize, 1, Height, Width) layout
17 |     """
18 | 
19 |     def __init__(self, in_planes, batchNorm=True):
20 |         super(ConfidenceEstimation, self).__init__()
21 | 
22 |         self.in_planes = in_planes
23 |         self.sec_in_planes = int(self.in_planes // 3)
24 |         self.sec_in_planes = self.sec_in_planes if self.sec_in_planes > 0 else 1
25 | 
26 |         self.conf_net = nn.Sequential(
27 |             conv_bn_relu(batchNorm, self.in_planes, self.sec_in_planes, 3, 1, 1, bias=False),
28 |             nn.Conv2d(self.sec_in_planes, 1, 1, 1, 0, bias=False)
29 |         )
30 | 
31 |     def forward(self, cost):
32 |         assert cost.shape[1] == self.in_planes
33 | 
34 |         confCost = self.conf_net(cost)
35 | 
36 |         return confCost
37 | 


--------------------------------------------------------------------------------
/dmb/modeling/stereo/conf_measure/gen_conf.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | 
 5 | class ConfGenerator(nn.Module):
 6 |     """
 7 |     Implementation of Confidence ground-truth label generation
 8 |     Args:
 9 |         gtDisp: tensor, in (Height, Width) or (BatchSize, Height, Width) or (BatchSize, 1, Height, Width) layout
10 |         estDisp: tensor, in (Height, Width) or (BatchSize, Height, Width) or (BatchSize, 1, Height, Width) layout
11 |         theta: a threshold parameter to compare the ground-truth disparity map and the estimated disparity map
12 |     Outputs:
13 |         confidence_gt_label, in (BatchSize, 1, Height, Width) layout
14 |     """
15 | 
16 |     def __init__(self, theta):
17 |         super(ConfGenerator, self).__init__()
18 | 
19 |         if not isinstance(theta, (int, float)):
20 |             raise TypeError('(int,float) is expected, got {}'.format(type(theta)))
21 | 
22 |         self.theta = theta
23 | 
24 |     def forward(self, estDisp, gtDisp):
25 | 
26 |         if not torch.is_tensor(gtDisp):
27 |             raise TypeError('ground truth disparity map is expected to be tensor, got {}'.format(type(gtDisp)))
28 |         if not torch.is_tensor(estDisp):
29 |             raise TypeError('estimated disparity map is expected to be tensor, got {}'.format(type(estDisp)))
30 | 
31 |         assert estDisp.shape == gtDisp.shape
32 | 
33 |         if gtDisp.dim() == 2:  # single image H x W
34 |             h, w = gtDisp.size(0), gtDisp.size(1)
35 |             gtDisp = gtDisp.view(1, 1, h, w)
36 |             estDisp = estDisp.view(1, 1, h, w)
37 | 
38 |         if gtDisp.dim() == 3:  # multi image B x H x W
39 |             b, h, w = gtDisp.size(0), gtDisp.size(1), gtDisp.size(2)
40 |             gtDisp = gtDisp.view(b, 1, h, w)
41 |             estDisp = estDisp.view(b, 1, h, w)
42 | 
43 |         if gtDisp.dim() == 4:
44 |             if gtDisp.size(1) == 1:  # mult image B x 1 x H x W
45 |                 self.gtDisp = gtDisp
46 |                 self.estDisp = estDisp
47 |             else:
48 |                 raise ValueError('2nd dimension size should be 1, got {}'.format(gtDisp.size(1)))
49 | 
50 |         confidence_gt_label = torch.lt(torch.abs(self.estDisp - self.gtDisp), self.theta).type_as(self.gtDisp)
51 | 
52 |         return confidence_gt_label
53 | 


--------------------------------------------------------------------------------
/dmb/modeling/stereo/cost_processors/AnyNet.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import torch.nn.functional as F
 4 | 
 5 | from .utils.dif_fms import fast_dif_fms
 6 | from .aggregators.AnyNet import AnyNetAggregator
 7 | 
 8 | class AnyNetProcessor(nn.Module):
 9 |     """
10 |     An implementation of cost procession in AnyNet
11 | 
12 |     Inputs:
13 |         stage, (str): 'init_guess', the coarsest disparity estimation,
14 |                       'warp_level_8', refine the disparity estimation with feature warp at resolution=1/8
15 |                       'warp_level_4', refine the disparity estimation with feature warp at resolution=1/4
16 |         left, (tensor): Left image feature, in [BatchSize, Channels, Height, Width] layout
17 |         right, (tensor): Right image feature, in [BatchSize, Channels, Height, Width] layout
18 |         disp, (tensor): Disparity map outputted from last stage, in [BatchSize, 1, Height, Width] layout
19 | 
20 |     Outputs:
21 |         cost_volume (tuple of Tensor): cost volume
22 |             in [BatchSize, MaxDisparity, Height, Width] layout
23 | 
24 |     """
25 | 
26 |     def __init__(self, cfg):
27 |         super(AnyNetProcessor, self).__init__()
28 |         self.cfg = cfg.copy()
29 |         self.batch_norm = cfg.model.batch_norm
30 | 
31 |         self.stage = self.cfg.model.stage
32 | 
33 |         # cost computation parameters, dict
34 |         self.max_disp = self.cfg.model.cost_processor.cost_computation.max_disp
35 |         self.start_disp = self.cfg.model.cost_processor.cost_computation.start_disp
36 |         self.dilation = self.cfg.model.cost_processor.cost_computation.dilation
37 | 
38 | 
39 |         # cost aggregation
40 |         self.aggregator_type = self.cfg.model.cost_processor.cost_aggregator.type
41 |         self.aggregator = nn.ModuleDict()
42 |         for st in self.stage:
43 |             self.aggregator[st] = AnyNetAggregator(
44 |                 in_planes=self.cfg.model.cost_processor.cost_aggregator.in_planes[st],
45 |                 agg_planes=self.cfg.model.cost_processor.cost_aggregator.agg_planes[st],
46 |                 num=self.cfg.model.cost_processor.cost_aggregator.num,
47 |                 batch_norm=self.batch_norm,
48 |             )
49 | 
50 |     def forward(self, stage, left, right, disp=None):
51 |         B, C, H, W = left.shape
52 |         # construct the raw cost volume
53 | 
54 |         end_disp = self.start_disp[stage] + self.max_disp[stage] - 1
55 | 
56 |         # disparity sample number
57 |         D = (self.max_disp[stage] + self.dilation[stage] - 1) // self.dilation[stage]
58 | 
59 |         # generate disparity samples, in [B, D, H, W] layout
60 |         disp_sample = torch.linspace(self.start_disp[stage], end_disp, D)
61 |         disp_sample = disp_sample.view(1, D, 1, 1).expand(B, D, H, W).to(left.device).float()
62 | 
63 |         # if initial disparity guessed, used for warping
64 |         if disp is not None:
65 |             # up-sample disparity map to the size of left
66 |             H, W = left.shape[-2:]
67 |             scale = W / disp.shape[-1]
68 |             disp = F.interpolate(disp * scale, size=(H, W), mode='bilinear', align_corners=False)
69 |             # shift the disparity sample to be centered at the given disparity map
70 |             disp_sample = disp_sample + disp
71 | 
72 |         # [B, C, D, H, W]
73 |         raw_cost = fast_dif_fms(left, right, disp_sample=disp_sample)
74 | 
75 |         # list [[B, D, H, W]]
76 |         cost = self.aggregator[stage](raw_cost)
77 | 
78 |         return cost
79 | 
80 | 
81 | 
82 | 
83 | 


--------------------------------------------------------------------------------
/dmb/modeling/stereo/cost_processors/__init__.py:
--------------------------------------------------------------------------------
1 | from .builder import build_cost_processor
2 | 


--------------------------------------------------------------------------------
/dmb/modeling/stereo/cost_processors/aggregators/AcfNet.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | from dmb.modeling.stereo.layers.basic_layers import conv3d_bn, conv3d_bn_relu, conv_bn_relu, conv_bn
 5 | from dmb.modeling.stereo.cost_processors.utils.hourglass import Hourglass
 6 | 
 7 | 
 8 | class AcfAggregator(nn.Module):
 9 |     """
10 |     Args:
11 |         max_disp (int): max disparity
12 |         in_planes (int): the channels of raw cost volume
13 |         batch_norm (bool): whether use batch normalization layer, default True
14 | 
15 |     Inputs:
16 |         raw_cost (Tensor): raw cost volume,
17 |                 in [BatchSize, Channels, MaxDisparity//4, Height//4, Width//4] layout
18 | 
19 |     Outputs:
20 |         cost_volume (tuple of Tensor): cost volume
21 |             in [BatchSize, MaxDisparity, Height, Width] layout
22 |     """
23 | 
24 |     def __init__(self, max_disp, in_planes=64, batch_norm=True):
25 |         super(AcfAggregator, self).__init__()
26 |         self.max_disp = max_disp
27 |         self.in_planes = in_planes
28 |         self.batch_norm = batch_norm
29 | 
30 |         self.dres0 = nn.Sequential(
31 |             conv3d_bn_relu(batch_norm, self.in_planes, 32, 3, 1, 1),
32 |             conv3d_bn_relu(batch_norm, 32, 32, 3, 1, 1),
33 |         )
34 |         self.dres1 = nn.Sequential(
35 |             conv3d_bn_relu(batch_norm, 32, 32, 3, 1, 1),
36 |             conv3d_bn(batch_norm, 32, 32, 3, 1, 1)
37 |         )
38 |         self.dres2 = Hourglass(in_planes=32, batch_norm=batch_norm)
39 |         self.dres3 = Hourglass(in_planes=32, batch_norm=batch_norm)
40 |         self.dres4 = Hourglass(in_planes=32, batch_norm=batch_norm)
41 | 
42 |         self.classif1 = nn.Sequential(
43 |             conv3d_bn_relu(batch_norm, 32, 32, 3, 1, 1),
44 |             nn.Conv3d(32, 1, kernel_size=3, stride=1, padding=1, bias=False),
45 |         )
46 |         self.classif2 = nn.Sequential(
47 |             conv3d_bn_relu(batch_norm, 32, 32, 3, 1, 1),
48 |             nn.Conv3d(32, 1, kernel_size=3, stride=1, padding=1, bias=False),
49 |         )
50 |         self.classif3 = nn.Sequential(
51 |             conv3d_bn_relu(batch_norm, 32, 32, 3, 1, 1),
52 |             nn.Conv3d(32, 1, kernel_size=3, stride=1, padding=1, bias=False)
53 |         )
54 | 
55 |         self.deconv1 = nn.ConvTranspose3d(1, 1, 8, 4, 2, bias=False)
56 |         self.deconv2 = nn.ConvTranspose3d(1, 1, 8, 4, 2, bias=False)
57 |         self.deconv3 = nn.ConvTranspose3d(1, 1, 8, 4, 2, bias=False)
58 | 
59 |     def forward(self, raw_cost):
60 |         B, C, D, H, W = raw_cost.shape
61 |         # concat_fms: (BatchSize, Channels*2, MaxDisparity/4, Height/4, Width/4)
62 |         cost0 = self.dres0(raw_cost)
63 |         cost0 = self.dres1(cost0) + cost0
64 | 
65 |         out1, pre1, post1 = self.dres2(cost0, None, None)
66 |         out1 = out1 + cost0
67 | 
68 |         out2, pre2, post2 = self.dres3(out1, pre1, post1)
69 |         out2 = out2 + cost0
70 | 
71 |         out3, pre3, post3 = self.dres4(out2, pre2, post2)
72 |         out3 = out3 + cost0
73 | 
74 |         cost1 = self.classif1(out1)
75 |         cost2 = self.classif2(out2) + cost1
76 |         cost3 = self.classif3(out3) + cost2
77 | 
78 |         # (BatchSize, 1, MaxDisparity, Height, Width)
79 |         full_h, full_w = H * 4, W * 4
80 | 
81 |         cost1 = self.deconv1(cost1, [self.max_disp, full_h, full_w])
82 |         cost2 = self.deconv2(cost2, [self.max_disp, full_h, full_w])
83 |         cost3 = self.deconv3(cost3, [self.max_disp, full_h, full_w])
84 | 
85 |         # (BatchSize, MaxDisparity, Height, Width)
86 |         cost1 = torch.squeeze(cost1, 1)
87 |         cost2 = torch.squeeze(cost2, 1)
88 |         cost3 = torch.squeeze(cost3, 1)
89 | 
90 |         return [cost3, cost2, cost1]
91 | 
92 | 
93 | 


--------------------------------------------------------------------------------
/dmb/modeling/stereo/cost_processors/aggregators/AnyNet.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | from dmb.modeling.stereo.layers.basic_layers import bn_relu_conv3d
 5 | 
 6 | 
 7 | class AnyNetAggregator(nn.Module):
 8 |     """
 9 |     Args:
10 |         in_planes (int): the channels of raw cost volume
11 |         agg_planes (int): the channels of middle 3d convolution layer
12 |         num, (int): the number of middle 3d convolution layer
13 |         batch_norm (bool): whether use batch normalization layer, default True
14 | 
15 |     Inputs:
16 |         raw_cost (Tensor): raw cost volume,
17 |                 in [BatchSize, in_planes, MaxDisparity, Height, Width] layout
18 | 
19 |     Outputs:
20 |         cost_volume (tuple of Tensor): cost volume
21 |             in [BatchSize, MaxDisparity, Height, Width] layout
22 |     """
23 | 
24 |     def __init__(self, in_planes=1, agg_planes=4, num=4, batch_norm=True):
25 |         super(AnyNetAggregator, self).__init__()
26 |         self.in_planes = in_planes
27 |         self.agg_planes = agg_planes
28 |         self.num = num
29 |         self.batch_norm = batch_norm
30 | 
31 |         self.agg_list =  [bn_relu_conv3d(batch_norm, in_planes, agg_planes, kernel_size=3,
32 |                                     stride=1, padding=1, dilation=1, bias=True)]
33 |         self.agg_list += [bn_relu_conv3d(batch_norm, agg_planes, agg_planes, kernel_size=3,
34 |                                     stride=1, padding=1, dilation=1, bias=True) for _ in range(num)]
35 |         self.agg_list += [bn_relu_conv3d(batch_norm, agg_planes, 1, kernel_size=3,
36 |                                     stride=1, padding=1, dilation=1, bias=True)]
37 |         self.agg = nn.Sequential(*self.agg_list)
38 | 
39 |     def forward(self, raw_cost):
40 |         # in: [B, in_planes, D, H, W], out: [B, 1, D, H, W]
41 |         cost = self.agg(raw_cost)
42 |         # [B, D, H, W]
43 |         cost = cost.squeeze(dim=1)
44 | 
45 |         return [cost]
46 | 
47 | 
48 | 


--------------------------------------------------------------------------------
/dmb/modeling/stereo/cost_processors/aggregators/DeepPruner.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | from dmb.modeling.stereo.layers.basic_layers import conv3d_bn, conv3d_bn_relu
 5 | from dmb.modeling.stereo.cost_processors.utils.hw_hourglass import HWHourglass
 6 | 
 7 | 
 8 | class DeepPrunerAggregator(nn.Module):
 9 |     """
10 |     Args:
11 |         in_planes (int): the channels of raw cost volume
12 |         hourglass_in_planes (int): the channels of hourglass module for cost aggregation
13 |         batch_norm (bool): whether use batch normalization layer, default True
14 | 
15 |     Inputs:
16 |         raw_cost (Tensor): raw cost volume, in [BatchSize, in_planes, MaxDisparity, Height, Width] layout
17 | 
18 |     Outputs:
19 |         cost_volume (tuple of Tensor): cost volume
20 |             in [BatchSize, MaxDisparity, Height, Width] layout
21 |     """
22 | 
23 |     def __init__(self, in_planes, hourglass_in_planes, batch_norm=True):
24 |         super(DeepPrunerAggregator, self).__init__()
25 |         self.in_planes = in_planes
26 |         self.hourglass_in_planes = hourglass_in_planes
27 |         self.batch_norm = batch_norm
28 | 
29 |         self.dres0 = nn.Sequential(
30 |             conv3d_bn_relu(batch_norm, in_planes, 64, kernel_size=3, stride=1, padding=1, bias=False),
31 |             conv3d_bn_relu(batch_norm, 64, 32, kernel_size=3, stride=1, padding=1, bias=False),
32 |         )
33 | 
34 |         self.dres1 = nn.Sequential(
35 |             conv3d_bn_relu(batch_norm, 32, 32, kernel_size=3, stride=1, padding=1, bias=False),
36 |             conv3d_bn_relu(batch_norm, 32, hourglass_in_planes, kernel_size=3, stride=1, padding=1, bias=False),
37 |         )
38 | 
39 |         self.dres2 = HWHourglass(hourglass_in_planes, batch_norm=batch_norm)
40 | 
41 |         self.classify = nn.Sequential(
42 |             conv3d_bn_relu(batch_norm, hourglass_in_planes, hourglass_in_planes * 2,
43 |                            kernel_size=3, stride=1, padding=1, bias=False),
44 |             nn.Conv3d(hourglass_in_planes * 2, 1, kernel_size=3, stride=1, padding=1, bias=False)
45 |         )
46 | 
47 |     def forward(self, raw_cost):
48 |         # in: [B, in_planes, D, H, W], out: [B, 64, D, H, W]
49 |         cost = self.dres0(raw_cost)
50 |         # in: [B, 64, D, H, W], out: [B, hourglass_in_planes, D, H, W]
51 |         cost = self.dres1(cost)
52 | 
53 |         # in: [B, hourglass_in_planes, D, H, W], out: [B, hourglass_in_planes, D, H, W]
54 |         cost = self.dres2(cost) + cost
55 | 
56 |         # in: [B, hourglass_in_planes, D, H, W], mid: [B, 1, D, H, W], out: [B, D, H, W]
57 |         cost = self.classify(cost).squeeze(1)
58 | 
59 |         return [cost]
60 | 


--------------------------------------------------------------------------------
/dmb/modeling/stereo/cost_processors/aggregators/PSMNet.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import torch.nn.functional as F
 4 | 
 5 | from dmb.modeling.stereo.layers.basic_layers import conv3d_bn, conv3d_bn_relu
 6 | from dmb.modeling.stereo.cost_processors.utils.hourglass import Hourglass
 7 | 
 8 | 
 9 | class PSMAggregator(nn.Module):
10 |     """
11 |     Args:
12 |         max_disp (int): max disparity
13 |         in_planes (int): the channels of raw cost volume
14 |         batch_norm (bool): whether use batch normalization layer, default True
15 | 
16 |     Inputs:
17 |         raw_cost (Tensor): concatenation-based cost volume without further processing,
18 |             in [BatchSize, in_planes, MaxDisparity//4, Height//4, Width//4] layout
19 |     Outputs:
20 |         cost_volume (tuple of Tensor): cost volume
21 |             in [BatchSize, MaxDisparity, Height, Width] layout
22 |     """
23 | 
24 |     def __init__(self, max_disp, in_planes=64, batch_norm=True):
25 |         super(PSMAggregator, self).__init__()
26 |         self.max_disp = max_disp
27 |         self.in_planes = in_planes
28 |         self.batch_norm = batch_norm
29 | 
30 |         self.dres0 = nn.Sequential(
31 |             conv3d_bn_relu(batch_norm, self.in_planes, 32, 3, 1, 1, bias=False),
32 |             conv3d_bn_relu(batch_norm, 32, 32, 3, 1, 1, bias=False),
33 |         )
34 |         self.dres1 = nn.Sequential(
35 |             conv3d_bn_relu(batch_norm, 32, 32, 3, 1, 1, bias=False),
36 |             conv3d_bn(batch_norm, 32, 32, 3, 1, 1, bias=False)
37 |         )
38 |         self.dres2 = Hourglass(in_planes=32, batch_norm=batch_norm)
39 |         self.dres3 = Hourglass(in_planes=32, batch_norm=batch_norm)
40 |         self.dres4 = Hourglass(in_planes=32, batch_norm=batch_norm)
41 | 
42 |         self.classif1 = nn.Sequential(
43 |             conv3d_bn_relu(batch_norm, 32, 32, 3, 1, 1, bias=False),
44 |             nn.Conv3d(32, 1, kernel_size=3, stride=1, padding=1, bias=False),
45 |         )
46 |         self.classif2 = nn.Sequential(
47 |             conv3d_bn_relu(batch_norm, 32, 32, 3, 1, 1, bias=False),
48 |             nn.Conv3d(32, 1, kernel_size=3, stride=1, padding=1, bias=False),
49 |         )
50 |         self.classif3 = nn.Sequential(
51 |             conv3d_bn_relu(batch_norm, 32, 32, 3, 1, 1, bias=False),
52 |             nn.Conv3d(32, 1, kernel_size=3, stride=1, padding=1, bias=False)
53 |         )
54 | 
55 |     def forward(self, raw_cost):
56 |         B, C, D, H, W = raw_cost.shape
57 |         # raw_cost: (BatchSize, Channels*2, MaxDisparity/4, Height/4, Width/4)
58 |         cost0 = self.dres0(raw_cost)
59 |         cost0 = self.dres1(cost0) + cost0
60 | 
61 |         out1, pre1, post1 = self.dres2(cost0, None, None)
62 |         out1 = out1 + cost0
63 | 
64 |         out2, pre2, post2 = self.dres3(out1, pre1, post1)
65 |         out2 = out2 + cost0
66 | 
67 |         out3, pre3, post3 = self.dres4(out2, pre2, post2)
68 |         out3 = out3 + cost0
69 | 
70 |         cost1 = self.classif1(out1)
71 |         cost2 = self.classif2(out2) + cost1
72 |         cost3 = self.classif3(out3) + cost2
73 | 
74 |         # (BatchSize, 1, max_disp, Height, Width)
75 |         full_h, full_w = H * 4, W * 4
76 |         align_corners = True
77 |         cost1 = F.interpolate(
78 |             cost1, [self.max_disp, full_h, full_w],
79 |             mode='trilinear', align_corners=align_corners
80 |         )
81 |         cost2 = F.interpolate(
82 |             cost2, [self.max_disp, full_h, full_w],
83 |             mode='trilinear', align_corners=align_corners
84 |         )
85 |         cost3 = F.interpolate(
86 |             cost3, [self.max_disp, full_h, full_w],
87 |             mode='trilinear', align_corners=align_corners
88 |         )
89 | 
90 |         # (BatchSize, max_disp, Height, Width)
91 |         cost1 = torch.squeeze(cost1, 1)
92 |         cost2 = torch.squeeze(cost2, 1)
93 |         cost3 = torch.squeeze(cost3, 1)
94 | 
95 |         return [cost3, cost2, cost1]
96 | 


--------------------------------------------------------------------------------
/dmb/modeling/stereo/cost_processors/aggregators/StereoNet.py:
--------------------------------------------------------------------------------
 1 | import math
 2 | import torch
 3 | import torch.nn as nn
 4 | import torch.nn.functional as F
 5 | 
 6 | from dmb.modeling.stereo.layers.basic_layers import conv3d_bn, conv3d_bn_relu
 7 | 
 8 | 
 9 | class StereoNetAggregator(nn.Module):
10 |     """
11 |     Args:
12 |         max_disp (int): max disparity
13 |         in_planes (int): the channels of raw cost volume
14 |         batch_norm (bool): whether use batch normalization layer, default True
15 | 
16 |     Inputs:
17 |         raw_cost (Tensor): difference-based cost volume without further processing,
18 |             in [BatchSize, in_planes, max_disp//8, Height//8, Width//8] layout (default)
19 |             or in [BatchSize, in_planes, max_disp//16, Height//16, Width//16] layout
20 | 
21 |     Outputs:
22 |         cost_volume (tuple of Tensor): cost volume
23 |             in [BatchSize, max_disp//8, Height//8, Width//8] layout (default)
24 |             or in [BatchSize, in_planes, max_disp//16, Height//16, Width//16] layout
25 |     """
26 | 
27 |     def __init__(self, max_disp, in_planes=32, batch_norm=True, num=4):
28 |         super(StereoNetAggregator, self).__init__()
29 |         self.max_disp = max_disp
30 |         self.in_planes = in_planes
31 |         self.batch_norm = batch_norm
32 |         self.num = num
33 | 
34 |         self.classify = nn.ModuleList([
35 |             conv3d_bn_relu(self.batch_norm, in_planes, 32, kernel_size=3,
36 |                            stride=1, padding=1, dilation=1, bias=True) for _ in range(self.num)
37 |         ])
38 | 
39 |         self.lastconv = nn.Conv3d(32, 1, kernel_size=3, stride=1, padding=1, bias=True)
40 | 
41 | 
42 |     def forward(self, raw_cost):
43 |         # default down-sample to 1/8 resolution, it also can be 1/16
44 |         # raw_cost: (BatchSize, Channels, MaxDisparity/8, Height/8, Width/8)
45 |         for i in range(self.num):
46 |             raw_cost = self.classify[i](raw_cost)
47 | 
48 |         # cost: (BatchSize, 1, MaxDisparity/8, Height/8, Width/8)
49 |         cost = self.lastconv(raw_cost)
50 | 
51 |         # (BatchSize, MaxDisparity/8, Height/8, Width/8)
52 |         cost = torch.squeeze(cost, 1)
53 | 
54 | 
55 |         return [cost]
56 | 


--------------------------------------------------------------------------------
/dmb/modeling/stereo/cost_processors/aggregators/__init__.py:
--------------------------------------------------------------------------------
1 | from .builder import build_cost_aggregator
2 | 


--------------------------------------------------------------------------------
/dmb/modeling/stereo/cost_processors/aggregators/builder.py:
--------------------------------------------------------------------------------
 1 | from .GCNet import GCAggregator
 2 | from .PSMNet import PSMAggregator
 3 | from .AcfNet import AcfAggregator
 4 | from .StereoNet import StereoNetAggregator
 5 | from .DeepPruner import DeepPrunerAggregator
 6 | from .AnyNet import AnyNetAggregator
 7 | 
 8 | AGGREGATORS = {
 9 |     "GCNet": GCAggregator,
10 |     "PSMNet": PSMAggregator,
11 |     "AcfNet": AcfAggregator,
12 |     'StereoNet': StereoNetAggregator,
13 |     'DeepPruner': DeepPrunerAggregator,
14 |     'AnyNet': AnyNetAggregator,
15 | }
16 | 
17 | 
18 | def build_cost_aggregator(cfg):
19 |     agg_type = cfg.model.cost_processor.cost_aggregator.type
20 |     assert agg_type in AGGREGATORS, "cost_aggregator type not found, excepted: {}," \
21 |                                      "but got {}".format(AGGREGATORS.keys(), agg_type)
22 | 
23 |     default_args = cfg.model.cost_processor.cost_aggregator.copy()
24 |     default_args.pop('type')
25 |     default_args.update(batch_norm=cfg.model.batch_norm)
26 | 
27 |     aggregator = AGGREGATORS[agg_type](**default_args)
28 | 
29 |     return aggregator
30 | 


--------------------------------------------------------------------------------
/dmb/modeling/stereo/cost_processors/builder.py:
--------------------------------------------------------------------------------
  1 | import torch.nn as nn
  2 | 
  3 | from .utils.cat_fms import CAT_FUNCS
  4 | from .utils.dif_fms import DIF_FUNCS
  5 | from .utils.correlation1d_cost import COR_FUNCS
  6 | from .aggregators import build_cost_aggregator
  7 | 
  8 | from .DeepPruner import DeepPrunerProcessor
  9 | from .AnyNet import AnyNetProcessor
 10 | 
 11 | 
 12 | class CostProcessor(nn.Module):
 13 | 
 14 |     def __init__(self):
 15 |         super(CostProcessor, self).__init__()
 16 | 
 17 |     def forward(self, *input):
 18 |         raise NotImplementedError
 19 | 
 20 | # Concatenate left and right feature to form cost volume
 21 | class CatCostProcessor(CostProcessor):
 22 | 
 23 |     def __init__(self, cfg):
 24 |         super(CatCostProcessor, self).__init__()
 25 |         cat_func = cfg.model.cost_processor.cost_computation.get('type', 'default')
 26 |         self.cat_func = CAT_FUNCS[cat_func]
 27 | 
 28 |         self.default_args = cfg.model.cost_processor.cost_computation.copy()
 29 |         self.default_args.pop('type')
 30 | 
 31 |         self.aggregator = build_cost_aggregator(cfg)
 32 | 
 33 |     def forward(self, ref_fms, tgt_fms, disp_sample=None):
 34 |         # 1. build raw cost by concat
 35 |         cat_cost = self.cat_func(ref_fms, tgt_fms, disp_sample=disp_sample, **self.default_args)
 36 | 
 37 |         # 2. aggregate cost by 3D-hourglass
 38 |         costs = self.aggregator(cat_cost)
 39 | 
 40 |         return costs
 41 | 
 42 | 
 43 | # Use the difference between left and right feature to form cost volume
 44 | class DifCostProcessor(CostProcessor):
 45 | 
 46 |     def __init__(self, cfg):
 47 |         super(DifCostProcessor, self).__init__()
 48 |         dif_func = cfg.model.cost_processor.cost_computation.get('type', 'default')
 49 |         self.dif_func = DIF_FUNCS[dif_func]
 50 | 
 51 |         self.default_args = cfg.model.cost_processor.cost_computation.copy()
 52 |         self.default_args.pop('type')
 53 | 
 54 |         self.aggregator = build_cost_aggregator(cfg)
 55 | 
 56 |     def forward(self, ref_fms, tgt_fms, disp_sample=None):
 57 |         # 1. build raw cost by concat
 58 |         cat_cost = self.dif_func(ref_fms, tgt_fms, disp_sample=disp_sample, **self.default_args)
 59 | 
 60 |         # 2. aggregate cost by 3D-hourglass
 61 |         costs = self.aggregator(cat_cost)
 62 | 
 63 |         return costs
 64 | 
 65 | 
 66 | # Use the correlation between left and right feature to form cost volume
 67 | class CorCostProcessor(CostProcessor):
 68 | 
 69 |     def __init__(self, cfg):
 70 |         super(CorCostProcessor, self).__init__()
 71 |         cor_func = cfg.model.cost_processor.cost_computation.get('type', 'default')
 72 |         self.cor_func = COR_FUNCS[cor_func]
 73 | 
 74 |         self.default_args = cfg.model.cost_processor.cost_computation.copy()
 75 |         self.default_args.pop('type')
 76 | 
 77 |         self.aggregator = build_cost_aggregator(cfg)
 78 | 
 79 |     def forward(self, ref_fms, tgt_fms, disp_sample=None):
 80 |         # 1. build raw cost by correlation
 81 |         cor_cost = self.cor_func(ref_fms, tgt_fms, disp_sample=disp_sample, **self.default_args)
 82 | 
 83 |         # 2. aggregate cost by 2D-hourglass
 84 |         costs = self.aggregator(cor_cost)
 85 | 
 86 |         return costs
 87 | 
 88 | 
 89 | PROCESSORS = {
 90 |     'Difference': DifCostProcessor,
 91 |     'Concatenation': CatCostProcessor,
 92 |     'Correlation': CorCostProcessor,
 93 |     'DeepPruner': DeepPrunerProcessor,
 94 |     'AnyNet': AnyNetProcessor,
 95 | }
 96 | 
 97 | def build_cost_processor(cfg):
 98 |     proc_type = cfg.model.cost_processor.type
 99 |     assert proc_type in PROCESSORS, "cost_processor type not found, excepted: {}," \
100 |                                     "but got {}".format(PROCESSORS.keys(), proc_type)
101 | 
102 |     args = dict(
103 |         cfg=cfg,
104 |     )
105 |     processor = PROCESSORS[proc_type](**args)
106 | 
107 |     return processor
108 | 
109 | 


--------------------------------------------------------------------------------
/dmb/modeling/stereo/cost_processors/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DeepMotionAIResearch/DenseMatchingBenchmark/010aeb66e3ceaf3d866036b0ca751861df39432d/dmb/modeling/stereo/cost_processors/utils/__init__.py


--------------------------------------------------------------------------------
/dmb/modeling/stereo/cost_processors/utils/cat_fms.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn.functional as F
 3 | 
 4 | from dmb.modeling.stereo.layers.inverse_warp_3d import inverse_warp_3d
 5 | 
 6 | 
 7 | def cat_fms(reference_fm, target_fm, max_disp=192, start_disp=0, dilation=1, disp_sample=None):
 8 |     """
 9 |     Concat left and right in Channel dimension to form the raw cost volume.
10 |     Args:
11 |         max_disp, (int): under the scale of feature used,
12 |             often equals to (end disp - start disp + 1), the maximum searching range of disparity
13 |         start_disp (int): the start searching disparity index, usually be 0
14 |             dilation (int): the step between near disparity index
15 |         dilation (int): the step between near disparity index
16 | 
17 |     Inputs:
18 |         reference_fm, (Tensor): reference feature, i.e. left image feature, in [BatchSize, Channel, Height, Width] layout
19 |         target_fm, (Tensor): target feature, i.e. right image feature, in [BatchSize, Channel, Height, Width] layout
20 | 
21 |     Output:
22 |         concat_fm, (Tensor): the formed cost volume, in [BatchSize, Channel*2, disp_sample_number, Height, Width] layout
23 | 
24 |     """
25 |     device = reference_fm.device
26 |     N, C, H, W = reference_fm.shape
27 | 
28 |     end_disp = start_disp + max_disp - 1
29 |     disp_sample_number = (max_disp + dilation - 1) // dilation
30 |     disp_index = torch.linspace(start_disp, end_disp, disp_sample_number)
31 | 
32 |     concat_fm = torch.zeros(N, C * 2, disp_sample_number, H, W).to(device)
33 |     idx = 0
34 |     for i in disp_index:
35 |         i = int(i) # convert torch.Tensor to int, so that it can be index
36 |         if i > 0:
37 |             concat_fm[:, :C, idx, :, i:] = reference_fm[:, :, :, i:]
38 |             concat_fm[:, C:, idx, :, i:] = target_fm[:, :, :, :-i]
39 |         elif i == 0:
40 |             concat_fm[:, :C, idx, :, :] = reference_fm
41 |             concat_fm[:, C:, idx, :, :] = target_fm
42 |         else:
43 |             concat_fm[:, :C, idx, :, :i] = reference_fm[:, :, :, :i]
44 |             concat_fm[:, C:, idx, :, :i] = target_fm[:, :, :, abs(i):]
45 |         idx = idx + 1
46 | 
47 |     concat_fm = concat_fm.contiguous()
48 |     return concat_fm
49 | 
50 | 
51 | def fast_cat_fms(reference_fm, target_fm, max_disp=192, start_disp=0, dilation=1, disp_sample=None):
52 |     device = reference_fm.device
53 |     B, C, H, W = reference_fm.shape
54 | 
55 |     if disp_sample is None:
56 |         end_disp = start_disp + max_disp - 1
57 | 
58 |         disp_sample_number = (max_disp + dilation - 1) // dilation
59 |         D = disp_sample_number
60 | 
61 |         # generate disparity samples, in [B,D, H, W] layout
62 |         disp_sample = torch.linspace(start_disp, end_disp, D)
63 |         disp_sample = disp_sample.view(1, D, 1, 1).expand(B, D, H, W).to(device).float()
64 | 
65 |     else: # direct provide disparity samples
66 |         # the number of disparity samples
67 |         D = disp_sample.shape[1]
68 | 
69 |     # expand D dimension
70 |     concat_reference_fm = reference_fm.unsqueeze(2).expand(B, C, D, H, W)
71 |     concat_target_fm = target_fm.unsqueeze(2).expand(B, C, D, H, W)
72 | 
73 |     # shift target feature according to disparity samples
74 |     concat_target_fm = inverse_warp_3d(concat_target_fm, -disp_sample, padding_mode='zeros')
75 | 
76 |     # mask out features in reference
77 |     concat_reference_fm = concat_reference_fm * (concat_target_fm > 0).float()
78 | 
79 |     # [B, 2C, D, H, W)
80 |     concat_fm = torch.cat((concat_reference_fm, concat_target_fm), dim=1)
81 | 
82 |     return concat_fm
83 | 
84 | 
85 | CAT_FUNCS = dict(
86 |     default=cat_fms,
87 |     fast_mode=fast_cat_fms,
88 | )
89 | 


--------------------------------------------------------------------------------
/dmb/modeling/stereo/cost_processors/utils/correlation1d_cost.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import torch.nn.functional as F
 4 | 
 5 | from spatial_correlation_sampler import SpatialCorrelationSampler
 6 | 
 7 | def correlation1d_cost(reference_fm, target_fm, max_disp=192, start_disp=0, dilation=1, disp_sample=None,
 8 |                        kernel_size=1, stride=1, padding=0, dilation_patch=1,):
 9 |     # for a pixel of left image at (x, y), it will calculates correlation cost volume
10 |     # with pixel of right image at (xr, y), where xr in [x-max_disp, x+max_disp]
11 |     # but we only need the left half part, i.e., [x-max_disp, 0]
12 |     correlation_sampler = SpatialCorrelationSampler(patch_size=(1, max_disp * 2 - 1),
13 |                                                     kernel_size=kernel_size,
14 |                                                     stride=stride, padding=padding,
15 |                                                     dilation_patch=dilation_patch)
16 |     # [B, 1, max_disp*2-1, H, W]
17 |     out = correlation_sampler(reference_fm, target_fm)
18 | 
19 |     # [B, max_disp*2-1, H, W]
20 |     out = out.squeeze(1)
21 | 
22 |     # [B, max_disp, H, W], grad the left half searching part
23 |     out = out[:, :max_disp, :, :]
24 | 
25 |     cost = F.leaky_relu(out, negative_slope=0.1, inplace=True)
26 | 
27 |     return cost
28 | 
29 | COR_FUNCS = dict(
30 |     default=correlation1d_cost,
31 | )
32 | 


--------------------------------------------------------------------------------
/dmb/modeling/stereo/cost_processors/utils/cost_norm.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | 
  4 | eps = 1e-5
  5 | 
  6 | 
  7 | class _CostVolumeNorm(nn.Module):
  8 |     """
  9 |         Normalize Cost Volume
 10 |         Args:
 11 |             dim (int): which dim to apply normalization operation, default dim is for the cost dim.
 12 |             affine (bool): whether the parameters are learnable, default is True
 13 |             weight (float): weight for cost re-range
 14 |             bias (float): bias for cost
 15 |         Shape:
 16 |             - Input: :math:`(N, *)`
 17 |             - Output: :math:`(N, *)` (same shape as input)
 18 |     """
 19 | 
 20 |     def __init__(self, dim=1, affine=True, weight=1, bias=0):
 21 |         super(_CostVolumeNorm, self).__init__()
 22 |         self.dim = dim
 23 |         self.affine = affine
 24 |         if self.affine:
 25 |             self.weight = nn.Parameter(data=torch.Tensor(1), requires_grad=True)
 26 |             self.bias = nn.Parameter(data=torch.Tensor(1), requires_grad=True)
 27 |         else:
 28 |             self.weight = nn.Parameter(data=torch.Tensor(1), requires_grad=False)
 29 |             self.bias = nn.Parameter(data=torch.Tensor(1), requires_grad=False)
 30 | 
 31 |         # init weight and bias
 32 |         self.weight.data.fill_(weight)
 33 |         self.bias.data.fill_(bias)
 34 | 
 35 |     def forward(self, input):
 36 |         raise NotImplementedError
 37 | 
 38 | 
 39 | class RangeNorm(_CostVolumeNorm):
 40 |     def __init__(self, dim=1, affine=True, weight=1, bias=0):
 41 |         super(RangeNorm, self).__init__(dim=dim, affine=affine, weight=weight, bias=bias)
 42 | 
 43 |     def forward(self, input):
 44 |         # compute mean value
 45 |         mean = input.min(dim=self.dim, keepdim=True)[0]
 46 |         # compute margin
 47 |         var = input.max(dim=self.dim, keepdim=True)[0] - input.min(dim=self.dim, keepdim=True)[0]
 48 |         # normalize
 49 |         normalized_input = (input - mean) / (var + eps)
 50 |         # apply weight and bias
 51 |         output = normalized_input * self.weight + self.bias
 52 | 
 53 |         return output
 54 | 
 55 | 
 56 | class VarNorm(_CostVolumeNorm):
 57 |     def __init__(self, dim=1, affine=True, weight=1, bias=0):
 58 |         super(VarNorm, self).__init__(dim=dim, affine=affine, weight=weight, bias=bias)
 59 | 
 60 |     def forward(self, input):
 61 |         # compute mean value
 62 |         mean = input.mean(dim=self.dim, keepdim=True)
 63 |         # compute var value
 64 |         var = input.var(dim=self.dim, keepdim=True)
 65 |         # normalize
 66 |         normalized_input = (input - mean).abs() / (var + eps)
 67 |         # apply weight and bias
 68 |         output = normalized_input * self.weight + self.bias
 69 | 
 70 |         return output
 71 | 
 72 | 
 73 | class StdNorm(_CostVolumeNorm):
 74 |     def __init__(self, dim=1, affine=True, weight=1, bias=0):
 75 |         super(StdNorm, self).__init__(dim=dim, affine=affine, weight=weight, bias=bias)
 76 | 
 77 |     def forward(self, input):
 78 |         # compute mean value
 79 |         mean = input.mean(dim=self.dim, keepdim=True)
 80 |         # compute var value
 81 |         var = input.std(dim=self.dim, keepdim=True)
 82 |         # normalize
 83 |         normalized_input = (input - mean).abs() / (var + eps)
 84 |         # apply weight and bias
 85 |         output = normalized_input * self.weight + self.bias
 86 | 
 87 |         return output
 88 | 
 89 | 
 90 | class SigmoidNorm(_CostVolumeNorm):
 91 |     def __init__(self, dim=1, affine=True, weight=1, bias=0):
 92 |         super(SigmoidNorm, self).__init__(dim=dim, affine=affine, weight=weight, bias=bias)
 93 | 
 94 |     def forward(self, input):
 95 |         # normalize
 96 |         normalized_input = torch.sigmoid(input)
 97 |         # apply weight and bias
 98 |         output = normalized_input * self.weight + self.bias
 99 | 
100 |         return output
101 | 


--------------------------------------------------------------------------------
/dmb/modeling/stereo/cost_processors/utils/dif_fms.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn.functional as F
 3 | 
 4 | from dmb.modeling.stereo.layers.inverse_warp_3d import inverse_warp_3d
 5 | 
 6 | 
 7 | def dif_fms(reference_fm, target_fm, max_disp=192, start_disp=0, dilation=1, disp_sample=None,
 8 |             normalize=False, p=1.0):
 9 |     """
10 |     Concat left and right in Channel dimension to form the raw cost volume.
11 |     Args:
12 |         max_disp, (int): under the scale of feature used,
13 |             often equals to (end disp - start disp + 1), the maximum searching range of disparity
14 |         start_disp (int): the start searching disparity index, usually be 0
15 |             dilation (int): the step between near disparity index
16 |         dilation (int): the step between near disparity index
17 | 
18 |     Inputs:
19 |         reference_fm, (Tensor): reference feature, i.e. left image feature, in [BatchSize, Channel, Height, Width] layout
20 |         target_fm, (Tensor): target feature, i.e. right image feature, in [BatchSize, Channel, Height, Width] layout
21 | 
22 |     Output:
23 |         dif_fm, (Tensor): the formed cost volume, in [BatchSize, Channel, disp_sample_number, Height, Width] layout
24 | 
25 |     """
26 |     device = reference_fm.device
27 |     N, C, H, W = reference_fm.shape
28 | 
29 |     end_disp = start_disp + max_disp - 1
30 |     disp_sample_number = (max_disp + dilation - 1) // dilation
31 |     disp_index = torch.linspace(start_disp, end_disp, disp_sample_number)
32 | 
33 |     dif_fm = torch.zeros(N, C, disp_sample_number, H, W).to(device)
34 |     idx = 0
35 |     for i in disp_index:
36 |         i = int(i) # convert torch.Tensor to int, so that it can be index
37 |         if i > 0:
38 |             dif_fm[:, :, idx, :, i:] = reference_fm[:, :, :, i:] - target_fm[:, :, :, :-i]
39 |         elif i == 0:
40 |             dif_fm[:, :, idx, :, :] = reference_fm - target_fm
41 |         else:
42 |             dif_fm[:, :, idx, :, :i] = reference_fm[:, :, :, :i] - target_fm[:, :, :, abs(i):]
43 |         idx = idx + 1
44 | 
45 |     dif_fm = dif_fm.contiguous()
46 |     return dif_fm
47 | 
48 | 
49 | def fast_dif_fms(reference_fm, target_fm, max_disp=192, start_disp=0, dilation=1, disp_sample=None,
50 |                  normalize=False, p=1.0,):
51 |     device = reference_fm.device
52 |     B, C, H, W = reference_fm.shape
53 | 
54 |     if disp_sample is None:
55 |         end_disp = start_disp + max_disp - 1
56 | 
57 |         disp_sample_number = (max_disp + dilation - 1) // dilation
58 |         D = disp_sample_number
59 | 
60 |         # generate disparity samples, in [B,D, H, W] layout
61 |         disp_sample = torch.linspace(start_disp, end_disp, D)
62 |         disp_sample = disp_sample.view(1, D, 1, 1).expand(B, D, H, W).to(device).float()
63 | 
64 |     else:  # direct provide disparity samples
65 |         # the number of disparity samples
66 |         D = disp_sample.shape[1]
67 | 
68 |     # expand D dimension
69 |     dif_reference_fm = reference_fm.unsqueeze(2).expand(B, C, D, H, W)
70 |     dif_target_fm = target_fm.unsqueeze(2).expand(B, C, D, H, W)
71 | 
72 |     # shift reference feature map with disparity through grid sample
73 |     # shift target feature according to disparity samples
74 |     dif_target_fm = inverse_warp_3d(dif_target_fm, -disp_sample, padding_mode='zeros')
75 | 
76 |     # mask out features in reference
77 |     dif_reference_fm = dif_reference_fm * (dif_target_fm > 0).type_as(dif_reference_fm)
78 | 
79 |     # [B, C, D, H, W)
80 |     dif_fm = dif_reference_fm - dif_target_fm
81 | 
82 |     if normalize:
83 |         # [B, D, H, W]
84 |         dif_fm = torch.norm(dif_fm, p=p, dim=1, keepdim=False)
85 | 
86 |     return dif_fm
87 | 
88 | 
89 | DIF_FUNCS = dict(
90 |     default=dif_fms,
91 |     fast_mode=fast_dif_fms,
92 | )
93 | 


--------------------------------------------------------------------------------
/dmb/modeling/stereo/cost_processors/utils/hourglass.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import torch.nn.functional as F
 4 | 
 5 | from dmb.modeling.stereo.layers.basic_layers import conv3d_bn, conv3d_bn_relu, conv_bn_relu, deconv3d_bn
 6 | 
 7 | 
 8 | class Hourglass(nn.Module):
 9 |     """
10 |     An implementation of hourglass module proposed in PSMNet.
11 |     Args:
12 |         in_planes (int): the channels of raw cost volume
13 |         batch_norm (bool): whether use batch normalization layer,
14 |             default True
15 |     Inputs:
16 |         x, (Tensor): cost volume
17 |             in [BatchSize, in_planes, MaxDisparity, Height, Width] layout
18 |         presqu, (optional, Tensor): cost volume
19 |             in [BatchSize, in_planes * 2, MaxDisparity, Height/2, Width/2] layout
20 |         postsqu, (optional, Tensor): cost volume
21 |             in [BatchSize, in_planes * 2, MaxDisparity, Height/2, Width/2] layout
22 |     Outputs:
23 |         out, (Tensor): cost volume
24 |             in [BatchSize, in_planes, MaxDisparity, Height, Width] layout
25 |         pre, (optional, Tensor): cost volume
26 |             in [BatchSize, in_planes * 2, MaxDisparity, Height/2, Width/2] layout
27 |         post, (optional, Tensor): cost volume
28 |             in [BatchSize, in_planes * 2, MaxDisparity, Height/2, Width/2] layout
29 | 
30 |     """
31 |     def __init__(self, in_planes, batch_norm=True):
32 |         super(Hourglass, self).__init__()
33 |         self.batch_norm = batch_norm
34 | 
35 |         self.conv1 = conv3d_bn_relu(
36 |             self.batch_norm, in_planes, in_planes * 2,
37 |             kernel_size=3, stride=2, padding=1, bias=False
38 |         )
39 | 
40 |         self.conv2 = conv3d_bn(
41 |             self.batch_norm, in_planes * 2, in_planes * 2,
42 |             kernel_size=3, stride=1, padding=1, bias=False
43 |         )
44 | 
45 |         self.conv3 = conv3d_bn_relu(
46 |             self.batch_norm, in_planes * 2, in_planes * 2,
47 |             kernel_size=3, stride=2, padding=1, bias=False
48 |         )
49 |         self.conv4 = conv3d_bn_relu(
50 |             self.batch_norm, in_planes * 2, in_planes * 2,
51 |             kernel_size=3, stride=1, padding=1, bias=False
52 |         )
53 |         self.conv5 = deconv3d_bn(
54 |             self.batch_norm, in_planes * 2, in_planes * 2,
55 |             kernel_size=3, padding=1, output_padding=1, stride=2, bias=False
56 |         )
57 |         self.conv6 = deconv3d_bn(
58 |             self.batch_norm, in_planes * 2, in_planes,
59 |             kernel_size=3, padding=1, output_padding=1, stride=2, bias=False
60 |         )
61 | 
62 |     def forward(self, x, presqu=None, postsqu=None):
63 |         # in: [B, C, D, H, W], out: [B, 2C, D, H/2, W/2]
64 |         out = self.conv1(x)
65 |         # in: [B, 2C, D, H/2, W/2], out: [B, 2C, D, H/2, W/2]
66 |         pre = self.conv2(out)
67 |         if postsqu is not None:
68 |             pre = F.relu(pre + postsqu, inplace=True)
69 |         else:
70 |             pre = F.relu(pre, inplace=True)
71 | 
72 |         # in: [B, 2C, D, H/2, W/2], out: [B, 2C, D, H/4, W/4]
73 |         out = self.conv3(pre)
74 |         # in: [B, 2C, D, H/4, W/4], out: [B, 2C, D, H/4, W/4]
75 |         out = self.conv4(out)
76 | 
77 |         # in: [B, 2C, D, H/4, W/4], out: [B, 2C, D, H/2, W/2]
78 |         if presqu is not None:
79 |             post = F.relu(self.conv5(out) + presqu, inplace=True)
80 |         else:
81 |             post = F.relu(self.conv5(out) + pre, inplace=True)
82 | 
83 |         # in: [B, 2C, D, H/2, W/2], out: [B, C, D, H, W]
84 |         out = self.conv6(post)
85 | 
86 |         return out, pre, post
87 | 


--------------------------------------------------------------------------------
/dmb/modeling/stereo/cost_processors/utils/hourglass_2d.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import torch.nn.functional as F
 4 | 
 5 | from dmb.modeling.stereo.layers.basic_layers import conv_bn, conv_bn_relu, deconv_bn
 6 | 
 7 | 
 8 | class Hourglass2D(nn.Module):
 9 |     """
10 |     An implementation of 2d hourglass module proposed in PSMNet.
11 |     Args:
12 |         in_planes (int): the channels of raw cost volume
13 |         batch_norm (bool): whether use batch normalization layer,
14 |             default True
15 |     Inputs:
16 |         x, (Tensor): cost volume
17 |             in [BatchSize, in_planes, Height, Width] layout
18 |         presqu, (optional, Tensor): cost volume
19 |             in [BatchSize, in_planes * 2, Height/2, Width/2] layout
20 |         postsqu, (optional, Tensor): cost volume
21 |             in [BatchSize, in_planes * 2, Height/2, Width/2] layout
22 |     Outputs:
23 |         out, (Tensor): cost volume
24 |             in [BatchSize, in_planes, MaxDisparity, Height, Width] layout
25 |         pre, (optional, Tensor): cost volume
26 |             in [BatchSize, in_planes * 2, Height/2, Width/2] layout
27 |         post, (optional, Tensor): cost volume
28 |             in [BatchSize, in_planes * 2, Height/2, Width/2] layout
29 | 
30 |     """
31 |     def __init__(self, in_planes, batch_norm=True):
32 |         super(Hourglass2D, self).__init__()
33 |         self.batch_norm = batch_norm
34 | 
35 |         self.conv1 = conv_bn_relu(
36 |             self.batch_norm, in_planes, in_planes * 2,
37 |             kernel_size=3, stride=2, padding=1, bias=False
38 |         )
39 | 
40 |         self.conv2 = conv_bn(
41 |             self.batch_norm, in_planes * 2, in_planes * 2,
42 |             kernel_size=3, stride=1, padding=1, bias=False
43 |         )
44 | 
45 |         self.conv3 = conv_bn_relu(
46 |             self.batch_norm, in_planes * 2, in_planes * 2,
47 |             kernel_size=3, stride=2, padding=1, bias=False
48 |         )
49 |         self.conv4 = conv_bn_relu(
50 |             self.batch_norm, in_planes * 2, in_planes * 2,
51 |             kernel_size=3, stride=1, padding=1, bias=False
52 |         )
53 |         self.conv5 = deconv_bn(
54 |             self.batch_norm, in_planes * 2, in_planes * 2,
55 |             kernel_size=3, padding=1, output_padding=1, stride=2, bias=False
56 |         )
57 |         self.conv6 = deconv_bn(
58 |             self.batch_norm, in_planes * 2, in_planes,
59 |             kernel_size=3, padding=1, output_padding=1, stride=2, bias=False
60 |         )
61 | 
62 |     def forward(self, x, presqu=None, postsqu=None):
63 |         # in: [B, C, H, W], out: [B, 2C, H/2, W/2]
64 |         out = self.conv1(x)
65 |         # in: [B, 2C, H/2, W/2], out: [B, 2C, H/2, W/2]
66 |         pre = self.conv2(out)
67 |         if postsqu is not None:
68 |             pre = F.relu(pre + postsqu, inplace=True)
69 |         else:
70 |             pre = F.relu(pre, inplace=True)
71 | 
72 |         # in: [B, 2C, H/2, W/2], out: [B, 2C, H/4, W/4]
73 |         out = self.conv3(pre)
74 |         # in: [B, 2C, H/4, W/4], out: [B, 2C, H/4, W/4]
75 |         out = self.conv4(out)
76 | 
77 |         # in: [B, 2C, H/4, W/4], out: [B, 2C, H/2, W/2]
78 |         if presqu is not None:
79 |             post = F.relu(self.conv5(out) + presqu, inplace=True)
80 |         else:
81 |             post = F.relu(self.conv5(out) + pre, inplace=True)
82 | 
83 |         # in: [B, 2C, H/2, W/2], out: [B, C, H, W]
84 |         out = self.conv6(post)
85 | 
86 |         return out, pre, post
87 | 


--------------------------------------------------------------------------------
/dmb/modeling/stereo/cost_processors/utils/hw_hourglass.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torch.nn.functional as F
  4 | 
  5 | from dmb.modeling.stereo.layers.basic_layers import conv3d_bn, conv3d_bn_relu, conv_bn_relu, deconv3d_bn
  6 | 
  7 | 
  8 | class HWHourglass(nn.Module):
  9 |     """
 10 |     An implementation of hourglass module proposed in DeepPruner.
 11 |     Although input 3D cost volume, but stride is only imposed on Height, Width dimension
 12 | 
 13 |     Args:
 14 |         in_planes (int): the channels of raw cost volume
 15 |         batch_norm (bool): whether use batch normalization layer,
 16 |             default True
 17 | 
 18 |     Inputs:
 19 |         raw_cost, (Tensor): raw cost volume
 20 |             in [BatchSize, in_planes, MaxDisparity, Height, Width] layout
 21 | 
 22 |     Outputs:
 23 |         cost, (Tensor): processed cost volume
 24 |             in [BatchSize, in_planes, MaxDisparity, Height, Width] layout
 25 | 
 26 |     """
 27 |     def __init__(self, in_planes, batch_norm=True):
 28 |         super(HWHourglass, self).__init__()
 29 |         self.batch_norm = batch_norm
 30 | 
 31 |         self.conv1_a = conv3d_bn_relu(
 32 |             self.batch_norm, in_planes, in_planes * 2,
 33 |             kernel_size=3, stride=(1,2,2), padding=1, bias=False
 34 |         )
 35 | 
 36 |         self.conv1_b = conv3d_bn_relu(
 37 |             self.batch_norm, in_planes * 2, in_planes * 2,
 38 |             kernel_size=3, stride=(1,1,1), padding=1, bias=False
 39 |         )
 40 | 
 41 |         self.conv1_d = deconv3d_bn(
 42 |             self.batch_norm, in_planes * 2, in_planes,
 43 |             kernel_size=3, padding=1, output_padding=(0,1,1), stride=(1,2,2), bias=False
 44 |         )
 45 | 
 46 |         self.conv2_a = conv3d_bn_relu(
 47 |             self.batch_norm, in_planes * 2, in_planes * 4,
 48 |             kernel_size=3, stride=(1, 2, 2), padding=1, bias=False
 49 |         )
 50 | 
 51 |         self.conv2_b = conv3d_bn_relu(
 52 |             self.batch_norm, in_planes * 4, in_planes * 4,
 53 |             kernel_size=3, stride=(1, 1, 1), padding=1, bias=False
 54 |         )
 55 | 
 56 |         self.conv2_d = deconv3d_bn(
 57 |             self.batch_norm, in_planes * 4, in_planes * 2,
 58 |             kernel_size=3, padding=1, output_padding=(0, 1, 1), stride=(1, 2, 2), bias=False
 59 |         )
 60 | 
 61 |         self.conv3_a = conv3d_bn_relu(
 62 |             self.batch_norm, in_planes * 4, in_planes * 8,
 63 |             kernel_size=3, stride=(1, 2, 2), padding=1, bias=False
 64 |         )
 65 | 
 66 |         self.conv3_b = conv3d_bn_relu(
 67 |             self.batch_norm, in_planes * 8, in_planes * 8,
 68 |             kernel_size=3, stride=(1, 1, 1), padding=1, bias=False
 69 |         )
 70 | 
 71 |         self.conv3_d = deconv3d_bn(
 72 |             self.batch_norm, in_planes * 8, in_planes * 4,
 73 |             kernel_size=3, padding=1, output_padding=(0, 1, 1), stride=(1, 2, 2), bias=False
 74 |         )
 75 | 
 76 | 
 77 |     def forward(self, raw_cost):
 78 |         # in: [B, C, D, H, W], out: [B, 2C, D, H/2, W/2]
 79 |         out1_a = self.conv1_a(raw_cost)
 80 | 
 81 |         # in: [B, 2C, D, H/2, W/2], out: [B, 2C, D, H/2, W/2]
 82 |         out1_b = self.conv1_b(out1_a) + out1_a
 83 | 
 84 |         # in: [B, 2C, D, H/2, W/2], out: [B, 4C, D, H/4, W/4]
 85 |         out2_a = self.conv2_a(out1_b)
 86 | 
 87 |         # in: [B, 4C, D, H/4, W/4], out: [B, 4C, D, H/4, W/4]
 88 |         out2_b = self.conv2_b(out2_a) + out2_a
 89 | 
 90 |         # in: [B, 8C, D, H/8, W/8], out: [B, 8C, D, H/8, W/8]
 91 |         out3_a = self.conv3_a(out2_b)
 92 | 
 93 |         # in: [B, 8C, D, H/8, W/8], out: [B, 8C, D, H/8, W/8]
 94 |         out3_b = self.conv3_b(out3_a) + out3_a
 95 | 
 96 |         # in: [B, 8C, D, H/8, W/8], out: [B, 4C, D, H/4, W/4]
 97 |         cost = self.conv3_d(out3_b) + out2_b
 98 | 
 99 |         # in: [B, 4C, D, H/4, W/4], out: [B, 2C, D, H/2, W/2]
100 |         cost = self.conv2_d(cost) + out1_b
101 | 
102 |         # in: [B, 2C, D, H/2, W/2], out: [B, C, D, H, W]
103 |         cost = self.conv1_d(cost)
104 | 
105 |         return cost
106 | 


--------------------------------------------------------------------------------
/dmb/modeling/stereo/disp_predictors/__init__.py:
--------------------------------------------------------------------------------
1 | from .builder import build_disp_predictor
2 | 


--------------------------------------------------------------------------------
/dmb/modeling/stereo/disp_predictors/builder.py:
--------------------------------------------------------------------------------
 1 | from .faster_soft_argmin import FasterSoftArgmin
 2 | from .local_soft_argmin import LocalSoftArgmin
 3 | from .soft_argmin import SoftArgmin
 4 | 
 5 | PREDICTORS = {
 6 |     'DEFAULT': SoftArgmin,
 7 |     'FASTER': FasterSoftArgmin,
 8 |     'LOCAL': LocalSoftArgmin,
 9 | }
10 | 
11 | 
12 | def build_disp_predictor(cfg):
13 |     pred_type = cfg.model.disp_predictor.get('type', 'FASTER')
14 | 
15 |     assert pred_type in PREDICTORS, 'disparity predictor type not found, expected: {},' \
16 |                                     'but got {}'.format(PREDICTORS.keys(), pred_type)
17 | 
18 |     default_args = cfg.model.disp_predictor.copy()
19 |     default_args.pop('type')
20 | 
21 |     disp_predictor = PREDICTORS[pred_type](**default_args)
22 | 
23 |     return disp_predictor
24 | 


--------------------------------------------------------------------------------
/dmb/modeling/stereo/disp_predictors/faster_soft_argmin.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import torch.nn.functional as F
 4 | 
 5 | 
 6 | class FasterSoftArgmin(nn.Module):
 7 |     """
 8 |     A faster implementation of soft argmin.
 9 |     details can refer to dmb.modeling.stereo.disp_predictors.soft_argmin
10 |     Args:
11 |         max_disp, (int): under the scale of feature used,
12 |             often equals to (end disp - start disp + 1), the maximum searching range of disparity
13 |         start_disp (int): the start searching disparity index, usually be 0
14 |         dilation (optional, int): the step between near disparity index
15 |         alpha (float or int): a factor will times with cost_volume
16 |             details can refer to: https://bouthilx.wordpress.com/2013/04/21/a-soft-argmax/
17 |         normalize (bool): whether apply softmax on cost_volume, default True
18 | 
19 |     Inputs:
20 |         cost_volume (Tensor): the matching cost after regularization,
21 |             in [BatchSize, disp_sample_number, Height, Width] layout
22 |         disp_sample (optional, Tensor): the estimated disparity samples,
23 |             in [BatchSize, disp_sample_number, Height, Width] layout. NOT USED!
24 |     Returns:
25 |         disp_map (Tensor): a disparity map regressed from cost volume,
26 |             in [BatchSize, 1, Height, Width] layout
27 |     """
28 | 
29 |     def __init__(self, max_disp, start_disp=0, dilation=1, alpha=1.0, normalize=True):
30 |         super(FasterSoftArgmin, self).__init__()
31 |         self.max_disp = max_disp
32 |         self.start_disp = start_disp
33 |         self.dilation = dilation
34 |         self.end_disp = start_disp + max_disp - 1
35 |         self.disp_sample_number = (max_disp + dilation - 1) // dilation
36 | 
37 |         self.alpha = alpha
38 |         self.normalize = normalize
39 | 
40 |         # compute disparity index: (1 ,1, disp_sample_number, 1, 1)
41 |         disp_sample = torch.linspace(
42 |             self.start_disp, self.end_disp, self.disp_sample_number
43 |         )
44 |         disp_sample = disp_sample.repeat(1, 1, 1, 1, 1).permute(0, 1, 4, 2, 3).contiguous()
45 | 
46 |         self.disp_regression = nn.Conv3d(1, 1, (self.disp_sample_number, 1, 1), 1, 0, bias=False)
47 | 
48 |         self.disp_regression.weight.data = disp_sample
49 |         self.disp_regression.weight.requires_grad = False
50 | 
51 |     def forward(self, cost_volume, disp_sample=None):
52 | 
53 |         # note, cost volume direct represent similarity
54 |         # 'c' or '-c' do not affect the performance because feature-based cost volume provided flexibility.
55 | 
56 |         if cost_volume.dim() != 4:
57 |             raise ValueError('expected 4D input (got {}D input)'
58 |                              .format(cost_volume.dim()))
59 | 
60 |         # scale cost volume with alpha
61 |         cost_volume = cost_volume * self.alpha
62 | 
63 |         if self.normalize:
64 |             prob_volume = F.softmax(cost_volume, dim=1)
65 |         else:
66 |             prob_volume = cost_volume
67 | 
68 |         # [B, disp_sample_number, W, H] -> [B, 1, disp_sample_number, W, H]
69 |         prob_volume = prob_volume.unsqueeze(1)
70 | 
71 |         disp_map = self.disp_regression(prob_volume)
72 |         # [B, 1, 1, W, H] -> [B, 1, W, H]
73 |         disp_map = disp_map.squeeze(1)
74 | 
75 |         return disp_map
76 | 
77 |     def __repr__(self):
78 |         repr_str = '{}\n'.format(self.__class__.__name__)
79 |         repr_str += ' ' * 4 + 'Max Disparity: {}\n'.format(self.max_disp)
80 |         repr_str += ' ' * 4 + 'Start disparity: {}\n'.format(self.start_disp)
81 |         repr_str += ' ' * 4 + 'Dilation rate: {}\n'.format(self.dilation)
82 |         repr_str += ' ' * 4 + 'Alpha: {}\n'.format(self.alpha)
83 |         repr_str += ' ' * 4 + 'Normalize: {}\n'.format(self.normalize)
84 | 
85 |         return repr_str
86 | 
87 |     @property
88 |     def name(self):
89 |         return 'FasterSoftArgmin'
90 | 


--------------------------------------------------------------------------------
/dmb/modeling/stereo/disp_predictors/soft_argmin.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import torch.nn.functional as F
 4 | 
 5 | class SoftArgmin(nn.Module):
 6 |     """
 7 |     An implementation of soft argmin.
 8 |     Args:
 9 |         max_disp, (int): under the scale of feature used,
10 |             often equals to (end disp - start disp + 1), the maximum searching range of disparity
11 |         start_disp (int): the start searching disparity index, usually be 0
12 |         dilation (optional, int): the step between near disparity index
13 |         alpha (float or int): a factor will times with cost_volume
14 |             details can refer to: https://bouthilx.wordpress.com/2013/04/21/a-soft-argmax/
15 |         normalize (bool): whether apply softmax on cost_volume, default True
16 | 
17 |     Inputs:
18 |         cost_volume (Tensor): the matching cost after regularization,
19 |             in [BatchSize, disp_sample_number, Height, Width] layout
20 |         disp_sample (optional, Tensor): the estimated disparity samples,
21 |             in [BatchSize, disp_sample_number, Height, Width] layout
22 | 
23 |     Returns:
24 |         disp_map (Tensor): a disparity map regressed from cost volume,
25 |             in [BatchSize, 1, Height, Width] layout
26 |     """
27 | 
28 |     def __init__(self, max_disp=192, start_disp=0, dilation=1, alpha=1.0, normalize=True):
29 |         super(SoftArgmin, self).__init__()
30 |         self.max_disp = max_disp
31 |         self.start_disp = start_disp
32 |         self.dilation = dilation
33 |         self.end_disp = start_disp + max_disp - 1
34 |         self.disp_sample_number = (max_disp + dilation - 1) // dilation
35 | 
36 |         self.alpha = alpha
37 |         self.normalize = normalize
38 | 
39 |         # generate disparity sample, in [disp_sample_number,] layout
40 |         self.disp_sample = torch.linspace(
41 |             self.start_disp, self.end_disp, self.disp_sample_number
42 |         )
43 | 
44 |     def forward(self, cost_volume, disp_sample=None):
45 | 
46 |         # note, cost volume direct represent similarity
47 |         # 'c' or '-c' do not affect the performance because feature-based cost volume provided flexibility.
48 | 
49 |         if cost_volume.dim() != 4:
50 |             raise ValueError('expected 4D input (got {}D input)'
51 |                              .format(cost_volume.dim()))
52 | 
53 |         # scale cost volume with alpha
54 |         cost_volume = cost_volume * self.alpha
55 | 
56 |         if self.normalize:
57 |             prob_volume = F.softmax(cost_volume, dim=1)
58 |         else:
59 |             prob_volume = cost_volume
60 | 
61 |         B, D, H, W = cost_volume.shape
62 | 
63 |         if disp_sample is None:
64 |             assert D == self.disp_sample_number, 'The number of disparity samples should be' \
65 |                                                  ' consistent!'
66 |             disp_sample = self.disp_sample.repeat(B, H, W, 1).permute(0, 3, 1, 2).contiguous()
67 |             disp_sample = disp_sample.to(cost_volume.device)
68 | 
69 |         else:
70 |             assert D == disp_sample.shape[1], 'The number of disparity samples should be' \
71 |                                                  ' consistent!'
72 |         # compute disparity: (BatchSize, 1, Height, Width)
73 |         disp_map = torch.sum(prob_volume * disp_sample, dim=1, keepdim=True)
74 | 
75 |         return disp_map
76 | 
77 |     def __repr__(self):
78 |         repr_str = '{}\n'.format(self.__class__.__name__)
79 |         repr_str += ' ' * 4 + 'Max Disparity: {}\n'.format(self.max_disp)
80 |         repr_str += ' ' * 4 + 'Start disparity: {}\n'.format(self.start_disp)
81 |         repr_str += ' ' * 4 + 'Dilation rate: {}\n'.format(self.dilation)
82 |         repr_str += ' ' * 4 + 'Alpha: {}\n'.format(self.alpha)
83 |         repr_str += ' ' * 4 + 'Normalize: {}\n'.format(self.normalize)
84 | 
85 |         return repr_str
86 | 
87 |     @property
88 |     def name(self):
89 |         return 'SoftArgmin'
90 | 


--------------------------------------------------------------------------------
/dmb/modeling/stereo/disp_refinement/AnyNet.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torch.nn.functional as F
  4 | 
  5 | from dmb.modeling.stereo.layers.basic_layers import conv_bn_relu
  6 | from dmb.ops import GateRecurrent2dnoind
  7 | 
  8 | class AnyNetRefinement(nn.Module):
  9 |     """
 10 | 
 11 |     The disparity refinement module proposed in AnyNet.
 12 | 
 13 |     Args:
 14 |         in_planes (int): the channels of input
 15 |         spn_planes (int): the channels used for spn
 16 |         batch_norm (bool): whether use batch normalization layer, default True
 17 | 
 18 |     Inputs:
 19 |         disps (list of Tensor): estimated disparity map, in [BatchSize, 1, Height//s, Width//s] layout
 20 |         left (Tensor): left image feature, in [BatchSize, Channels, Height, Width] layout
 21 |         right(Tensor): right image feature, in [BatchSize, Channels, Height, Width] layout
 22 |         leftImage (Tensor): left image, in [BatchSize, 3, Height, Width] layout
 23 |         rightImage (Tensor): right image, in [BatchSize, 3, Height, Width] layout
 24 | 
 25 |     Outputs:
 26 |         refine_disps (list of Tensor): refined disparity map, in [BatchSize, 1, Height, Width] layout
 27 | 
 28 | 
 29 |     """
 30 |     def __init__(self, in_planes, spn_planes=8, batch_norm=True):
 31 |         super(AnyNetRefinement, self).__init__()
 32 |         self.in_planes = in_planes
 33 |         self.spn_planes = spn_planes
 34 |         self.batch_norm = batch_norm
 35 | 
 36 |         self.img_conv = nn.Sequential(
 37 |             conv_bn_relu(batch_norm, in_planes, spn_planes * 2,
 38 |                          kernel_size=3, stride=1, padding=1, dilation=1, bias=False),
 39 |             conv_bn_relu(batch_norm, spn_planes * 2, spn_planes * 2,
 40 |                          kernel_size=3, stride=1, padding=1, dilation=1, bias=False),
 41 |             conv_bn_relu(batch_norm, spn_planes * 2, spn_planes * 2,
 42 |                          kernel_size=3, stride=1, padding=1, dilation=1, bias=False),
 43 |             nn.Conv2d(spn_planes * 2, spn_planes * 3,
 44 |                          kernel_size=3, stride=1, padding=1, dilation=1, bias=False),
 45 |         )
 46 | 
 47 |         self.disp_conv = nn.Conv2d(1, spn_planes, kernel_size=3,
 48 |                                    stride=1, padding=1, dilation=1, bias=False)
 49 | 
 50 |         self.classify = nn.Conv2d(spn_planes, 1, kernel_size=3,
 51 |                                    stride=1, padding=1, dilation=1, bias=False)
 52 | 
 53 |         # left->right propagation
 54 |         self.spn = GateRecurrent2dnoind(True,False)
 55 | 
 56 | 
 57 |     def forward(self, disps, left, right, leftImage, rightImage):
 58 |         # only disparity map in the last stage need to be refined
 59 |         init_disp = disps[-1]
 60 | 
 61 |         # down-sample the left image to the resolution of disparity map
 62 |         h, w = init_disp.shape[-2:]
 63 |         leftImage = F.interpolate(leftImage, size=(h,w), mode='bilinear', align_corners=False)
 64 | 
 65 |         # extract guidance information from left image
 66 |         # [B, spn_planes*3, H, W]
 67 |         G = self.img_conv(leftImage)
 68 | 
 69 |         # G1~G3: three coefficient maps (e.g., left-top, left-center, left-bottom)
 70 |         # [B, spn_planes, H, W]
 71 |         G1, G2, G3 = torch.split(G, self.spn_planes, dim=1)
 72 | 
 73 |         # for any pixel i, |G1(i)| + |G2(i)| + |G3(i)| <= 1 is a sufficient condition for model stability
 74 |         # [B, spn_planes, H, W]
 75 |         sum_abs = G1.abs() + G2.abs() + G3.abs()
 76 |         G1 = torch.div(G1, sum_abs + 1e-8)
 77 |         G2 = torch.div(G2, sum_abs + 1e-8)
 78 |         G3 = torch.div(G3, sum_abs + 1e-8)
 79 | 
 80 |         # [B, spn_planes, H, W]
 81 |         disp_feat = self.disp_conv(init_disp)
 82 | 
 83 |         # [B, spn_planes, H, W]
 84 |         propogated_disp_feat = self.spn(disp_feat, G1, G2, G3)
 85 | 
 86 |         # [B, 1, H, W]
 87 |         res_disp = self.classify(propogated_disp_feat)
 88 | 
 89 |         # [B, 1, H, W]
 90 |         refine_disp = F.relu(res_disp + init_disp)
 91 | 
 92 |         disps.append(refine_disp)
 93 |         # In this framework, we always keep the better disparity map be ahead the worse.
 94 |         disps.reverse()
 95 | 
 96 |         return disps
 97 | 
 98 | 
 99 | 
100 | 


--------------------------------------------------------------------------------
/dmb/modeling/stereo/disp_refinement/DeepPruner.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import torch.nn.functional as F
 4 | 
 5 | from dmb.modeling.stereo.layers.basic_layers import conv_bn, conv_bn_relu
 6 | 
 7 | 
 8 | class RefinementHeand(nn.Module):
 9 |     """
10 |     Args:
11 |         in_planes (int): the channels of input
12 |         batch_norm (bool): whether use batch normalization layer, default True
13 | 
14 |     Inputs:
15 |         disps (Tensor): estimated disparity map, in [BatchSize, 1, Height, Width] layout
16 |         input (Tensor): feature used to guide refinement, in [BatchSize, in_planes, Height, Width] layout
17 | 
18 |     Outputs:
19 |         refine_disp (Tensor): refined disparity map, in [BatchSize, 1, Height, Width] layout
20 | 
21 |     """
22 |     def __init__(self, in_planes, batch_norm=True):
23 |         super(RefinementHeand, self).__init__()
24 |         self.in_planes = in_planes
25 |         self.batch_norm = batch_norm
26 | 
27 |         self.conv = nn.Sequential(
28 |             conv_bn_relu(batch_norm, in_planes, 32, kernel_size=3, stride=1, padding=1, bias=False),
29 |             conv_bn_relu(batch_norm, 32, 32, kernel_size=3, stride=1, padding=1, dilation=1, bias=False),
30 |             conv_bn_relu(batch_norm, 32, 32, kernel_size=3, stride=1, padding=1, dilation=1, bias=False),
31 |             conv_bn_relu(batch_norm, 32, 16, kernel_size=3, stride=1, padding=2, dilation=2, bias=False),
32 |             conv_bn_relu(batch_norm, 16, 16, kernel_size=3, stride=1, padding=4, dilation=4, bias=False),
33 |             conv_bn_relu(batch_norm, 16, 16, kernel_size=3, stride=1, padding=1, dilation=1, bias=False)
34 |         )
35 | 
36 |         self.classify = nn.Conv2d(16, 1, kernel_size=3, padding=1, stride=1, bias=False)
37 | 
38 |     def forward(self, init_disp, input):
39 | 
40 |         res_disp = self.classify(self.conv(input))
41 | 
42 |         refine_disp = F.relu(res_disp + init_disp)
43 | 
44 |         return refine_disp
45 | 
46 | 
47 | class DeepPrunerRefinement(nn.Module):
48 |     """
49 |     The disparity refinement module proposed in DeepPruner.
50 | 
51 |     Args:
52 |         in_planes (list, tuple): the channels of input of each refinement sub network
53 |         batch_norm (bool): whether use batch normalization layer, default True
54 |         num (int): the number of cascade refinement sub network, default 1
55 | 
56 |     Inputs:
57 |         disps (list of Tensor): estimated disparity map, in [BatchSize, 1, Height, Width] layout
58 |         input (Tensor): feature used to guide refinement, in [BatchSize, in_planes, Height, Width] layout
59 | 
60 |     Outputs:
61 |         refine_disps (list of Tensor): refined disparity map, in [BatchSize, 1, Height, Width] layout
62 | 
63 |     """
64 | 
65 |     def __init__(self, in_planes_list, batch_norm=True, num=1):
66 |         super(DeepPrunerRefinement, self).__init__()
67 |         self.in_planes_list = in_planes_list
68 |         self.batch_norm = batch_norm
69 |         self.num = num
70 | 
71 |         # cascade the edge aware refinement module
72 |         self.refine_blocks = nn.ModuleList([
73 |             RefinementHeand(self.in_planes_list[i], self.batch_norm) for i in range(self.num)
74 |         ])
75 | 
76 | 
77 |     def forward(self, disps, low_ref_group_fms):
78 | 
79 |         for i in range(self.num):
80 |             # get last stage disparity map
81 |             init_disp = disps[-1]
82 |             # concatenate last stage disparity map into guide feature map
83 |             guide_fms = torch.cat((low_ref_group_fms[i], init_disp), dim=1)
84 |             # residual refinement
85 |             refine_disp = self.refine_blocks[i](init_disp, guide_fms)
86 |             # up-sample the refined disparity map
87 |             refine_disp = F.interpolate(refine_disp*2, scale_factor=(2,2), mode='bilinear', align_corners=False)
88 | 
89 |             disps.append(refine_disp)
90 | 
91 |         # In this framework, we always keep the better disparity map be ahead the worse.
92 |         disps.reverse()
93 | 
94 |         return disps
95 | 
96 | 
97 | 
98 | 
99 | 


--------------------------------------------------------------------------------
/dmb/modeling/stereo/disp_refinement/StereoNet.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import torch.nn.functional as F
 4 | 
 5 | from .utils.edge_aware import  EdgeAwareRefinement
 6 | 
 7 | class StereoNetRefinement(nn.Module):
 8 |     """
 9 |     The disparity refinement module proposed in StereoNet.
10 | 
11 |     Args:
12 |         in_planes (int): the channels of input
13 |         batch_norm (bool): whether use batch normalization layer, default True
14 |         num (int): the number of edge aware refinement module
15 | 
16 |     Inputs:
17 |         disps (list of Tensor): estimated disparity map, in [BatchSize, 1, Height//s, Width//s] layout
18 |         left (Tensor): left image feature, in [BatchSize, Channels, Height, Width] layout
19 |         right(Tensor): right image feature, in [BatchSize, Channels, Height, Width] layout
20 |         leftImage (Tensor): left image, in [BatchSize, 3, Height, Width] layout
21 |         rightImage (Tensor): right image, in [BatchSize, 3, Height, Width] layout
22 | 
23 |     Outputs:
24 |         refine_disps (list of Tensor): refined disparity map, in [BatchSize, 1, Height, Width] layout
25 | 
26 |     """
27 | 
28 |     def __init__(self, in_planes, batch_norm=True, num=1):
29 |         super(StereoNetRefinement, self).__init__()
30 |         self.in_planes = in_planes
31 |         self.batch_norm = batch_norm
32 |         self.num = num
33 | 
34 |         # cascade the edge aware refinement module
35 |         self.refine_blocks = nn.ModuleList([
36 |             EdgeAwareRefinement(self.in_planes, self.batch_norm) for _ in range(self.num)
37 |         ])
38 | 
39 |     def forward(self, disps, left, right, leftImage, rightImage):
40 |         # only one estimated disparity map in StereoNet
41 |         init_disp = disps[-1]
42 | 
43 |         # Upsample the coarse disparity map to the full resolution
44 |         h, w = leftImage.shape[-2:]
45 | 
46 |         # the scale of downsample
47 |         scale = w / init_disp.shape[-1]
48 | 
49 |         # upsample disparity map to image size, in [BatchSize, 1, Height, Width]
50 |         init_disp = F.interpolate(init_disp, size=(h, w), mode='bilinear', align_corners=False)
51 |         init_disp = init_disp * scale
52 | 
53 |         # cascade and refine the previous disparity map
54 |         refine_disps = [init_disp]
55 |         for block in self.refine_blocks:
56 |             refine_disps.append(block(refine_disps[-1], leftImage))
57 | 
58 |         # In this framework, we always keep the better disparity map be ahead the worse.
59 |         refine_disps.reverse()
60 | 
61 |         return refine_disps
62 | 
63 | 
64 | 
65 | 
66 | 


--------------------------------------------------------------------------------
/dmb/modeling/stereo/disp_refinement/__init__.py:
--------------------------------------------------------------------------------
1 | from .builder import build_disp_refinement


--------------------------------------------------------------------------------
/dmb/modeling/stereo/disp_refinement/builder.py:
--------------------------------------------------------------------------------
 1 | from .StereoNet import StereoNetRefinement
 2 | from .DeepPruner import DeepPrunerRefinement
 3 | from .AnyNet import AnyNetRefinement
 4 | 
 5 | REFINEMENTS = {
 6 |     "StereoNet": StereoNetRefinement,
 7 |     "DeepPruner": DeepPrunerRefinement,
 8 |     "AnyNet": AnyNetRefinement,
 9 | }
10 | 
11 | 
12 | def build_disp_refinement(cfg):
13 |     refine_type = cfg.model.disp_refinement.type
14 |     assert refine_type in REFINEMENTS, "disp refinement type not found, excepted: {}," \
15 |                                      "but got {}".format(REFINEMENTS.keys(), refine_type)
16 | 
17 |     default_args = cfg.model.disp_refinement.copy()
18 |     default_args.pop('type')
19 |     default_args.update(batch_norm=cfg.model.batch_norm)
20 | 
21 |     refinement = REFINEMENTS[refine_type](**default_args)
22 | 
23 |     return refinement
24 | 


--------------------------------------------------------------------------------
/dmb/modeling/stereo/disp_refinement/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DeepMotionAIResearch/DenseMatchingBenchmark/010aeb66e3ceaf3d866036b0ca751861df39432d/dmb/modeling/stereo/disp_refinement/utils/__init__.py


--------------------------------------------------------------------------------
/dmb/modeling/stereo/disp_refinement/utils/edge_aware.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import torch.nn.functional as F
 4 | 
 5 | from dmb.modeling.stereo.layers.basic_layers import conv_bn, conv_bn_relu, BasicBlock
 6 | 
 7 | 
 8 | class EdgeAwareRefinement(nn.Module):
 9 |     """
10 |     The edge aware refinement module proposed in StereoNet.
11 |     Args:
12 |         in_planes (int): the channels of input
13 |         batch_norm (bool): whether use batch normalization layer, default True
14 | 
15 |     Inputs:
16 |         disp (Tensor): estimated disparity map, in [BatchSize, 1, Height//s, Width//s] layout
17 |         leftImage (Tensor): left image, in [BatchSize, Channels, Height, Width] layout
18 | 
19 |     Outputs:
20 |         refine_disp (Tensor): refined disparity map, in [BatchSize, 1, Height, Width] layout
21 |     """
22 | 
23 |     def __init__(self, in_planes, batch_norm=True):
24 |         super(EdgeAwareRefinement, self).__init__()
25 | 
26 |         self.in_planes = in_planes
27 |         self.batch_norm = batch_norm
28 | 
29 |         self.conv_mix = conv_bn_relu(self.batch_norm, self.in_planes, 32,
30 |                                       kernel_size=3, stride=1, padding=1, dilation=1, bias=True)
31 | 
32 |         # Dilated residual module
33 |         self.residual_dilation_blocks = nn.ModuleList()
34 |         self.dilation_list = [1, 2, 4, 8, 1, 1]
35 |         for dilation in self.dilation_list:
36 |             self.residual_dilation_blocks.append(
37 |                 BasicBlock(self.batch_norm, 32, 32, stride=1, downsample=None,
38 |                            padding=1, dilation=dilation)
39 |             )
40 | 
41 |         self.conv_res = nn.Conv2d(32, 1, kernel_size=3, stride=1, padding=1, bias=True)
42 | 
43 |     def forward(self, disp, leftImage):
44 |         h, w = leftImage.shape[-2:]
45 | 
46 |         # the scale of downsample
47 |         scale = w / disp.shape[-1]
48 | 
49 |         # upsample disparity map to image size, in [BatchSize, 1, Height, Width]
50 |         up_disp = F.interpolate(disp, size=(h, w), mode='bilinear', align_corners=False)
51 |         up_disp = up_disp * scale
52 | 
53 |         # residual refinement
54 |         # mix the info inside the disparity map and left image
55 |         mix_feat = self.conv_mix(torch.cat((up_disp, leftImage), dim=1))
56 | 
57 |         for block in self.residual_dilation_blocks:
58 |             mix_feat = block(mix_feat)
59 | 
60 |         # get residual disparity map, in [BatchSize, 1, Height, Width]
61 |         res_disp = self.conv_res(mix_feat)
62 | 
63 |         # refine the upsampled disparity map, in [BatchSize, 1, Height, Width]
64 |         refine_disp = res_disp + up_disp
65 | 
66 |         # promise all disparity value larger than 0, in [BatchSize, 1, Height, Width]
67 |         refine_disp = F.relu(refine_disp, inplace=True)
68 | 
69 |         return refine_disp
70 | 
71 | 
72 | 


--------------------------------------------------------------------------------
/dmb/modeling/stereo/disp_refinement/utils/min_warp_error.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Written by youmi.
 3 | Implementation of stack dilation module.
 4 | 
 5 | FrameWork: PyTorch
 6 | """
 7 | 
 8 | from __future__ import print_function
 9 | import torch
10 | import torch.nn as nn
11 | import torch.utils.data
12 | import torch.nn.functional as F
13 | 
14 | from dmb.modeling.stereo.layers.inverse_warp import inverse_warp
15 | from dmb.modeling.stereo.layers.basic_layers import conv_bn_relu, BasicBlock, conv_bn, deconv_bn_relu
16 | 
17 | 
18 | class WarpErrorRefinement(nn.Module):
19 |     """
20 |     Minimise the warp error to refine initial disparity map.
21 |     Args:
22 |         in_planes, (int): the channels of left feature
23 |         batch_norm, (bool): whether use batch normalization layer
24 | 
25 |     Inputs:
26 |         disp, (Tensor): the left disparity map, in (BatchSize, 1, Height//s, Width//s) layout
27 |         left, (Tensor): the left image feature, in (BatchSize, Channels, Height, Width) layout
28 |         right, (Tensor): the right image feature, in (BatchSize, Channels, Height, Width) layout
29 | 
30 |     Outputs:
31 |         refine_disp (Tensor): refined disparity map, in [BatchSize, 1, Height, Width] layout
32 | 
33 |     """
34 | 
35 |     def __init__(self, in_planes, C=16, batch_norm=True):
36 |         super(WarpErrorRefinement, self).__init__()
37 |         self.in_planes = in_planes
38 |         self.batch_norm = batch_norm
39 |         self.C = C
40 | 
41 |         self.conv_mix = conv_bn_relu(batch_norm, in_planes*4 + 1, 2*C, kernel_size=3, stride=1, padding=1, dilation=1, bias=False)
42 | 
43 |         # Dilated residual module
44 |         self.residual_dilation_blocks = nn.ModuleList()
45 |         self.dilation_list = [1, 2, 4, 8, 1, 1]
46 |         for dilation in self.dilation_list:
47 |             self.residual_dilation_blocks.append(
48 |                 conv_bn_relu(batch_norm, 2*C, 2*C, kernel_size=3, stride=1,
49 |                              padding=dilation, dilation=dilation, bias=False)
50 |             )
51 | 
52 |         self.conv_res = nn.Conv2d(2*C, 1, kernel_size=3, stride=1, padding=1, bias=True)
53 | 
54 |     def forward(self, disp, left, right):
55 |         B, C, H, W = left.shape
56 | 
57 |         # the scale of downsample
58 |         scale = W / disp.shape[-1]
59 | 
60 |         # upsample disparity map to image size, in [BatchSize, 1, Height, Width]
61 |         up_disp = F.interpolate(disp, size=(H, W), mode='bilinear', align_corners=True)
62 |         up_disp = up_disp * scale
63 | 
64 |         # calculate warp error
65 |         warp_right = inverse_warp(right, -up_disp)
66 |         error = torch.abs(left - warp_right)
67 | 
68 |         # residual refinement
69 |         # mix the info inside the disparity map, left image, right image and warp error
70 |         mix_feat = self.conv_mix(torch.cat((left, right, warp_right, error, disp), 1))
71 | 
72 |         for block in self.residual_dilation_blocks:
73 |             mix_feat = block(mix_feat)
74 | 
75 |             # get residual disparity map, in [BatchSize, 1, Height, Width]
76 |         res_disp = self.conv_res(mix_feat)
77 | 
78 |         # refine the upsampled disparity map, in [BatchSize, 1, Height, Width]
79 |         refine_disp = res_disp + up_disp
80 | 
81 |         # promise all disparity value larger than 0, in [BatchSize, 1, Height, Width]
82 |         refine_disp = F.relu(refine_disp, inplace=True)
83 | 
84 |         return refine_disp
85 | 


--------------------------------------------------------------------------------
/dmb/modeling/stereo/disp_samplers/__init__.py:
--------------------------------------------------------------------------------
1 | from .builder import build_disp_sampler


--------------------------------------------------------------------------------
/dmb/modeling/stereo/disp_samplers/builder.py:
--------------------------------------------------------------------------------
 1 | from .DeepPruner import DeepPrunerSampler
 2 | 
 3 | SAMPLER = {
 4 |     "DeepPruner": DeepPrunerSampler,
 5 | }
 6 | 
 7 | 
 8 | def build_disp_sampler(cfg):
 9 |     sampler_type = cfg.model.disp_sampler.type
10 |     assert sampler_type in SAMPLER, "disp_sampler type not found, expected: {}," \
11 |                                     "but got {}".format(SAMPLER.keys(), sampler_type)
12 | 
13 |     default_args = cfg.model.disp_sampler.copy()
14 |     default_args.pop('type')
15 |     default_args.update(batch_norm=cfg.model.batch_norm)
16 | 
17 |     sampler = SAMPLER[sampler_type](**default_args)
18 | 
19 |     return sampler
20 | 


--------------------------------------------------------------------------------
/dmb/modeling/stereo/disp_samplers/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DeepMotionAIResearch/DenseMatchingBenchmark/010aeb66e3ceaf3d866036b0ca751861df39432d/dmb/modeling/stereo/disp_samplers/utils/__init__.py


--------------------------------------------------------------------------------
/dmb/modeling/stereo/layers/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DeepMotionAIResearch/DenseMatchingBenchmark/010aeb66e3ceaf3d866036b0ca751861df39432d/dmb/modeling/stereo/layers/__init__.py


--------------------------------------------------------------------------------
/dmb/modeling/stereo/layers/dilated_hourglass.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import torch.nn.functional as F
 4 | 
 5 | from dmb.modeling.stereo.layers.basic_layers import conv3d_bn, conv3d_bn_relu, conv_bn_relu, deconv3d_bn
 6 | 
 7 | 
 8 | class Hourglass(nn.Module):
 9 |     def __init__(self, in_planes, batchNorm=True):
10 |         super(Hourglass, self).__init__()
11 |         self.batchNorm = batchNorm
12 | 
13 |         self.conv1 = conv3d_bn_relu(
14 |             self.batchNorm, in_planes, in_planes * 2,
15 |             kernel_size=3, stride=2, padding=1, bias=False
16 |         )
17 | 
18 |         self.conv2 = conv3d_bn(
19 |             self.batchNorm, in_planes * 2, in_planes * 2,
20 |             kernel_size=3, stride=1, padding=1, bias=False
21 |         )
22 | 
23 |         self.conv3 = conv3d_bn_relu(
24 |             self.batchNorm, in_planes * 2, in_planes * 2,
25 |             kernel_size=3, stride=2, padding=1, bias=False
26 |         )
27 |         self.conv4 = conv3d_bn_relu(
28 |             self.batchNorm, in_planes * 2, in_planes * 2,
29 |             kernel_size=3, stride=1, padding=1, bias=False
30 |         )
31 |         self.conv5 = deconv3d_bn(
32 |             self.batchNorm, in_planes * 2, in_planes * 2,
33 |             kernel_size=3, padding=1, output_padding=1, stride=2, bias=False
34 |         )
35 |         self.conv6 = deconv3d_bn(
36 |             self.batchNorm, in_planes * 2, in_planes,
37 |             kernel_size=3, padding=1, output_padding=1, stride=2, bias=False
38 |         )
39 | 
40 |     def forward(self, x, presqu, postsqu):
41 |         # in:1/4, out:1/8
42 |         out = self.conv1(x)
43 |         # in:1/8, out:1/8
44 |         pre = self.conv2(out)
45 |         if postsqu is not None:
46 |             pre = F.relu(pre + postsqu, inplace=True)
47 |         else:
48 |             pre = F.relu(pre, inplace=True)
49 | 
50 |         # in:1/8, out:1/16
51 |         out = self.conv3(pre)
52 |         # in:1/16, out:1/16
53 |         out = self.conv4(out)
54 | 
55 |         # in:1/16, out:1/8
56 |         if presqu is not None:
57 |             post = F.relu(self.conv5(out) + presqu, inplace=True)
58 |         else:
59 |             post = F.relu(self.conv5(out) + pre, inplace=True)
60 | 
61 |         # in:1/8, out:1/4
62 |         out = self.conv6(post)
63 | 
64 |         return out, pre, post
65 | 


--------------------------------------------------------------------------------
/dmb/modeling/stereo/layers/inverse_warp.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | 
 4 | def inverse_warp(img, disp, padding_mode='zeros'):
 5 |     """
 6 |     Args:
 7 |         img (Tensor): the source image (where to sample pixels) -- [B, C, H, W]
 8 |         disp (Tensor): disparity map of the target image -- [B, 1, H, W]
 9 |         padding_mode (str): padding mode, default is zero padding
10 |     Returns:
11 |         projected_img (Tensor): source image warped to the target image -- [B, C, H, W]
12 |     """
13 |     b, _, h, w = disp.size()
14 | 
15 |     # [1, H, W]  copy 0-height for w times : y coord
16 |     i_range = torch.arange(0, h).view(1, h, 1).expand(1, h, w).float()
17 |     # [1, H, W]  copy 0-width for h times  : x coord
18 |     j_range = torch.arange(0, w).view(1, 1, w).expand(1, h, w).float()
19 | 
20 |     pixel_coords = torch.stack((j_range, i_range), dim=1).float().to(disp.device)  # [1, 2, H, W]
21 |     batch_pixel_coords = pixel_coords.expand(b, 2, h, w).contiguous().view(b, 2, -1)  # [B, 2, H*W]
22 | 
23 |     X = batch_pixel_coords[:, 0, :] + disp.contiguous().view(b, -1)  # [B, H*W]
24 |     Y = batch_pixel_coords[:, 1, :]
25 | 
26 |     X_norm = 2 * X / (w - 1) - 1
27 |     Y_norm = 2 * Y / (h - 1) - 1
28 | 
29 |     # If grid has values outside the range of [-1, 1], the corresponding outputs are handled as defined by padding_mode.
30 |     # Details please refer to torch.nn.functional.grid_sample
31 |     if padding_mode == 'zeros':
32 |         X_mask = ((X_norm > 1) + (X_norm < -1)).detach()
33 |         X_norm[X_mask] = 2
34 |         Y_mask = ((Y_norm > 1) + (Y_norm < -1)).detach()
35 |         Y_norm[Y_mask] = 2
36 | 
37 |     pixel_coords = torch.stack([X_norm, Y_norm], dim=2)  # [B, H*W, 2]
38 |     pixel_coords = pixel_coords.view(b, h, w, 2)  # [B, H, W, 2]
39 | 
40 |     projected_img = torch.nn.functional.grid_sample(img, pixel_coords, padding_mode=padding_mode)
41 | 
42 |     return projected_img
43 | 


--------------------------------------------------------------------------------
/dmb/modeling/stereo/layers/inverse_warp_3d.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn.functional as F
 3 | 
 4 | def inverse_warp_3d(img, disp, padding_mode='zeros', disp_Y=None):
 5 |     """
 6 |     Args:
 7 |         img (Tensor): the source image (where to sample pixels) -- [B, C, H, W] or [B, C, D, H, W]
 8 |         disp (Tensor): disparity map of the target image -- [B, D, H, W]
 9 |         padding_mode (str): padding mode, default is zero padding
10 |         disp_Y (Tensor): disparity map of the target image along Y-axis -- [B, D, H, W]
11 |     Returns:
12 |         projected_img (Tensor): source image warped to the target image -- [B, C, D, H, W]
13 |     """
14 | 
15 |     device = disp.device
16 |     B, D, H, W = disp.shape
17 |     C = img.shape[1]
18 | 
19 |     if disp_Y is not None:
20 |         assert disp.shape == disp_Y.shape, 'disparity map along x and y axis should have same shape!'
21 |     if img.dim() == 4:
22 |         img = img.unsqueeze(2).expand(B, C, D, H, W)
23 |     elif img.dim() == 5:
24 |         assert D == img.shape[2], 'The disparity number should be same between image and disparity map!'
25 |     else:
26 |         raise ValueError('image is only allowed with 4 or 5 dimensions, '
27 |                          'but got {} dimensions!'.format(img.dim()))
28 | 
29 |     # get mesh grid for each dimension
30 |     grid_d = torch.linspace(0, D - 1, D).view(1, D, 1, 1).expand(B, D, H, W).to(device)
31 |     grid_h = torch.linspace(0, H - 1, H).view(1, 1, H, 1).expand(B, D, H, W).to(device)
32 |     grid_w = torch.linspace(0, W - 1, W).view(1, 1, 1, W).expand(B, D, H, W).to(device)
33 | 
34 |     # shift the index of W dimension with disparity
35 |     grid_w = grid_w + disp
36 |     if disp_Y is not None:
37 |         grid_h = grid_h + disp_Y
38 | 
39 |     # normalize the grid value into [-1, 1]; (0, D-1), (0, H-1), (0, W-1)
40 |     grid_d = (grid_d / (D - 1) * 2) - 1
41 |     grid_h = (grid_h / (H - 1) * 2) - 1
42 |     grid_w = (grid_w / (W - 1) * 2) - 1
43 | 
44 |     # concatenate the grid_* to [B, D, H, W, 3]
45 |     grid_d = grid_d.unsqueeze(4)
46 |     grid_h = grid_h.unsqueeze(4)
47 |     grid_w = grid_w.unsqueeze(4)
48 |     grid = torch.cat((grid_w, grid_h, grid_d), 4)
49 | 
50 |     projected_img = F.grid_sample(img, grid, padding_mode=padding_mode)
51 | 
52 |     return projected_img
53 | 


--------------------------------------------------------------------------------
/dmb/modeling/stereo/losses/__init__.py:
--------------------------------------------------------------------------------
1 | from .builder import make_gsm_loss_evaluator
2 | 


--------------------------------------------------------------------------------
/dmb/modeling/stereo/losses/conf_nll_loss.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import torch.nn.functional as F
 4 | 
 5 | 
 6 | class ConfidenceNllLoss(object):
 7 |     """
 8 |     Args:
 9 |         weights (list of float or None): weight for each scale of estCost.
10 |         start_disp (int): the start searching disparity index, usually be 0
11 |         max_disp (int): the max of Disparity. default: 192
12 |         sparse (bool): whether the ground-truth disparity is sparse,
13 |             for example, KITTI is sparse, but SceneFlow is not. default is False
14 |     Inputs:
15 |         estConf (Tensor or list of Tensor): the estimated confidence map,
16 |             in [BatchSize, 1, Height, Width] layout.
17 |         gtDisp (Tensor): the ground truth disparity map,
18 |             in [BatchSize, 1, Height, Width] layout.
19 |     Outputs:
20 |         weighted_loss_all_level (dict of Tensors): the weighted loss of all levels
21 |     """
22 | 
23 |     def __init__(self, max_disp, start_disp=0, weights=None, sparse=False):
24 |         self.max_disp = max_disp
25 |         self.start_disp = start_disp
26 |         self.weights = weights
27 |         self.sparse = sparse
28 |         if sparse:
29 |             # sparse disparity ==> max_pooling
30 |             self.scale_func = F.adaptive_max_pool2d
31 |         else:
32 |             # dense disparity ==> avg_pooling
33 |             self.scale_func = F.adaptive_avg_pool2d
34 | 
35 |     def loss_per_level(self, estConf, gtDisp):
36 |         N, C, H, W = estConf.shape
37 |         scaled_gtDisp = gtDisp
38 |         scale = 1.0
39 |         if gtDisp.shape[-2] != H or gtDisp.shape[-1] != W:
40 |             # compute scale per level and scale gtDisp
41 |             scale = gtDisp.shape[-1] / (W * 1.0)
42 |             scaled_gtDisp = gtDisp / scale
43 |             scaled_gtDisp = self.scale_func(scaled_gtDisp, (H, W))
44 | 
45 |         # mask for valid disparity
46 |         # gt zero and lt max disparity
47 |         mask = (scaled_gtDisp > self.start_disp) & (scaled_gtDisp < (self.max_disp / scale))
48 |         mask = mask.detach_().type_as(gtDisp)
49 | 
50 |         # NLL loss
51 |         valid_pixel_number = mask.float().sum()
52 |         if valid_pixel_number < 1.0:
53 |             valid_pixel_number = 1.0
54 |         loss = (-1.0 * F.logsigmoid(estConf) * mask).sum() / valid_pixel_number
55 | 
56 |         return loss
57 | 
58 |     def __call__(self, estConf, gtDisp):
59 |         if not isinstance(estConf, (list, tuple)):
60 |             estConf = [estConf]
61 | 
62 |         if self.weights is None:
63 |             self.weights = [1.0] * len(estConf)
64 | 
65 |         # compute loss for per level
66 |         loss_all_level = [
67 |             self.loss_per_level(est_conf_per_lvl, gtDisp)
68 |             for est_conf_per_lvl in estConf
69 |         ]
70 | 
71 |         # re-weight loss per level
72 |         weighted_loss_all_level = dict()
73 |         for i, loss_per_level in enumerate(loss_all_level):
74 |             name = "conf_loss_lvl{}".format(i)
75 |             weighted_loss_all_level[name] = self.weights[i] * loss_per_level
76 | 
77 |         return weighted_loss_all_level
78 | 
79 |     def __repr__(self):
80 |         repr_str = '{}\n'.format(self.__class__.__name__)
81 |         repr_str += ' ' * 4 + 'Max Disparity: {}\n'.format(self.max_disp)
82 |         repr_str += ' ' * 4 + 'Loss weight: {}\n'.format(self.weights)
83 |         repr_str += ' ' * 4 + 'Disparity is sparse: {}\n'.format(self.sparse)
84 | 
85 |         return repr_str
86 | 
87 |     @property
88 |     def name(self):
89 |         return 'ConfidenceNLLLoss'
90 | 


--------------------------------------------------------------------------------
/dmb/modeling/stereo/losses/smooth_l1_loss.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import torch.nn.functional as F
 4 | 
 5 | 
 6 | class DispSmoothL1Loss(object):
 7 |     """
 8 |     Args:
 9 |         max_disp (int): the max of Disparity. default is 192
10 |         start_disp (int): the start searching disparity index, usually be 0
11 |         weights (list of float or None): weight for each scale of estCost.
12 |         sparse (bool): whether the ground-truth disparity is sparse,
13 |             for example, KITTI is sparse, but SceneFlow is not, default is False.
14 |     Inputs:
15 |         estDisp (Tensor or list of Tensor): the estimated disparity map,
16 |             in [BatchSize, 1, Height, Width] layout.
17 |         gtDisp (Tensor): the ground truth disparity map,
18 |             in [BatchSize, 1, Height, Width] layout.
19 |     Outputs:
20 |         loss (dict), the loss of each level
21 |     """
22 | 
23 |     def __init__(self, max_disp, start_disp=0, weights=None, sparse=False):
24 |         self.max_disp = max_disp
25 |         self.weights = weights
26 |         self.start_disp = start_disp
27 |         self.sparse = sparse
28 |         if sparse:
29 |             # sparse disparity ==> max_pooling
30 |             self.scale_func = F.adaptive_max_pool2d
31 |         else:
32 |             # dense disparity ==> avg_pooling
33 |             self.scale_func = F.adaptive_avg_pool2d
34 | 
35 |     def loss_per_level(self, estDisp, gtDisp):
36 |         N, C, H, W = estDisp.shape
37 |         scaled_gtDisp = gtDisp
38 |         scale = 1.0
39 |         if gtDisp.shape[-2] != H or gtDisp.shape[-1] != W:
40 |             # compute scale per level and scale gtDisp
41 |             scale = gtDisp.shape[-1] / (W * 1.0)
42 |             scaled_gtDisp = gtDisp / scale
43 |             scaled_gtDisp = self.scale_func(scaled_gtDisp, (H, W))
44 | 
45 |         # mask for valid disparity
46 |         # (start disparity, max disparity / scale)
47 |         # Attention: the invalid disparity of KITTI is set as 0, be sure to mask it out
48 |         mask = (scaled_gtDisp > self.start_disp) & (scaled_gtDisp < (self.max_disp / scale))
49 |         if mask.sum() < 1.0:
50 |             print('SmoothL1 loss: there is no point\'s disparity is in ({},{})!'.format(self.start_disp,
51 |                                                                                         self.max_disp / scale))
52 |             loss = (torch.abs(estDisp - scaled_gtDisp) * mask.float()).mean()
53 |             return loss
54 | 
55 |         # smooth l1 loss
56 |         loss = F.smooth_l1_loss(estDisp[mask], scaled_gtDisp[mask], reduction='mean')
57 | 
58 |         return loss
59 | 
60 |     def __call__(self, estDisp, gtDisp):
61 |         if not isinstance(estDisp, (list, tuple)):
62 |             estDisp = [estDisp]
63 | 
64 |         if self.weights is None:
65 |             self.weights = [1.0] * len(estDisp)
66 | 
67 |         # compute loss for per level
68 |         loss_all_level = []
69 |         for est_disp_per_lvl in estDisp:
70 |             loss_all_level.append(
71 |                 self.loss_per_level(est_disp_per_lvl, gtDisp)
72 |             )
73 | 
74 |         # re-weight loss per level
75 |         weighted_loss_all_level = dict()
76 |         for i, loss_per_level in enumerate(loss_all_level):
77 |             name = "l1_loss_lvl{}".format(i)
78 |             weighted_loss_all_level[name] = self.weights[i] * loss_per_level
79 | 
80 |         return weighted_loss_all_level
81 | 
82 |     def __repr__(self):
83 |         repr_str = '{}\n'.format(self.__class__.__name__)
84 |         repr_str += ' ' * 4 + 'Max Disparity: {}\n'.format(self.max_disp)
85 |         repr_str += ' ' * 4 + 'Start disparity: {}\n'.format(self.start_disp)
86 |         repr_str += ' ' * 4 + 'Loss weight: {}\n'.format(self.weights)
87 |         repr_str += ' ' * 4 + 'Disparity is sparse: {}\n'.format(self.sparse)
88 | 
89 |         return repr_str
90 | 
91 |     @property
92 |     def name(self):
93 |         return 'SmoothL1Loss'
94 | 


--------------------------------------------------------------------------------
/dmb/modeling/stereo/losses/utils/__init__.py:
--------------------------------------------------------------------------------
1 | from .disp2prob import (
2 |     LaplaceDisp2Prob, GaussianDisp2Prob, OneHotDisp2Prob
3 | )
4 | from .ssim import SSIM
5 | 


--------------------------------------------------------------------------------
/dmb/modeling/stereo/losses/utils/quantile_loss.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import torch.nn.functional as F
 4 | 
 5 | def quantile_loss(minEstDisp, maxEstDisp, gtDisp, max_disp, start_disp=0, weight=1.0, theta=0.05):
 6 |     """
 7 |     An implementation of quantile loss proposed in DeepPruner
 8 |     Details refer to: https://github.com/uber-research/DeepPruner/blob/master/deeppruner/loss_evaluation.py
 9 | 
10 |     Inputs:
11 |         minEstDisp, (Tensor): the estimated min disparity map, i.e. the lower bound of disparity samples,
12 |                               in [BatchSize, 1, Height, Width] layout.
13 |         maxEstDisp, (Tensor): the estimated max disparity map, i.e. the upper bound of disparity samples
14 |                               in [BatchSize, 1, Height, Width] layout.
15 |         gtDisp, (Tensor): the ground truth disparity map,
16 |                           in [BatchSize, 1, Height, Width] layout.
17 |         max_disp (int): the max of Disparity. default is 192
18 |         start_disp (int): the start searching disparity index, usually be 0
19 |         weight (int, float): the weight of quantile loss
20 |         theta (float): the balancing scalar, 0 < theta < 0.05
21 | 
22 | 
23 |     """
24 |     # get valid ground truth disparity
25 |     mask = (gtDisp > start_disp) & (gtDisp < (start_disp + max_disp))
26 | 
27 |     # forces min_disparity to be equal or slightly lower than the ground truth disparity
28 |     min_mask = ((gtDisp[mask] -minEstDisp[mask]) < 0).float()
29 |     # if x < 0, x * (-0.95); if x > 0, x * 0.05
30 |     min_loss = (gtDisp[mask] - minEstDisp[mask]) * (theta - min_mask)
31 |     min_loss = min_loss.mean()
32 | 
33 |     # forces max_disparity to be equal or slightly larger than the ground truth disparity
34 |     max_mask = ((gtDisp[mask] - maxEstDisp[mask]) < 0).float()
35 |     # if x < 0, x * (-0.05); if x > 0, x * 0.95
36 |     max_loss = (gtDisp[mask] - maxEstDisp[mask]) * ((1 - theta) - max_mask)
37 |     max_loss = max_loss.mean()
38 | 
39 |     total_loss = (min_loss + max_loss) * weight
40 | 
41 |     return total_loss


--------------------------------------------------------------------------------
/dmb/modeling/stereo/losses/utils/ssim.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn.functional as F
 3 | 
 4 | 
 5 | def SSIM(x, y, mask=None, C1=0.01 ** 2, C2=0.03 ** 2):
 6 |     """
 7 |     Calculate the SSIM between two given tensor.
 8 |     Details please refer to https://en.wikipedia.org/wiki/Structural_similarity
 9 |     Args:
10 |         x (torch.Tensor): in [BatchSize, Channels, Height, Width] layout
11 |         y (torch.Tensor): in [BatchSize, Channels, Height, Width] layout
12 |         mask (None or torch.Tensor): the mask of valid index, in [BatchSize, Channels, Height, Width] layout
13 |         C1 (double or int): a variable to stabilize the division with weak denominator
14 |         C2 (double or int): a variable to stabilize the division with weak denominator
15 |     Outputs:
16 |         (double): the average difference between x and y, value ranges from [0, 1]
17 |     """
18 | 
19 |     mu_x = F.avg_pool2d(x, 3, 1, 1)
20 |     mu_y = F.avg_pool2d(y, 3, 1, 1)
21 |     mu_x_mu_y = mu_x * mu_y
22 |     mu_x_sq = mu_x.pow(2)
23 |     mu_y_sq = mu_y.pow(2)
24 | 
25 |     sigma_x = F.avg_pool2d(x ** 2, 3, 1, 1) - mu_x_sq
26 |     sigma_y = F.avg_pool2d(y ** 2, 3, 1, 1) - mu_y_sq
27 |     sigma_xy = F.avg_pool2d(x * y, 3, 1, 1) - mu_x_mu_y
28 | 
29 |     SSIM_n = (2 * mu_x_mu_y + C1) * (2 * sigma_xy + C2)
30 |     SSIM_d = (mu_x_sq + mu_y_sq + C1) * (sigma_x + sigma_y + C2)
31 |     SSIM = SSIM_n / SSIM_d
32 | 
33 |     if mask is not None:
34 |         SSIM = SSIM[mask]
35 | 
36 |     # Here, we calculate the difference between x and y, and limit its value in [0,1]
37 |     return torch.clamp((1 - SSIM) / 2, 0, 1).mean()
38 | 


--------------------------------------------------------------------------------
/dmb/modeling/stereo/models/__init__.py:
--------------------------------------------------------------------------------
 1 | from .general_stereo_model import GeneralizedStereoModel
 2 | from .DeepPruner import DeepPruner
 3 | from .AnyNet import AnyNet
 4 | 
 5 | _META_ARCHITECTURES = {
 6 |     "GeneralizedStereoModel": GeneralizedStereoModel,
 7 |     "DeepPruner": DeepPruner,
 8 |     "AnyNet": AnyNet,
 9 | }
10 | 
11 | 
12 | def build_stereo_model(cfg):
13 |     meta_arch = _META_ARCHITECTURES[cfg.model.meta_architecture]
14 |     return meta_arch(cfg)
15 | 


--------------------------------------------------------------------------------
/dmb/modeling/stereo/models/general_stereo_model.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import torch.nn.functional as F
 4 | 
 5 | from dmb.modeling.stereo.backbones import build_backbone
 6 | from dmb.modeling.stereo.disp_samplers import build_disp_sampler
 7 | from dmb.modeling.stereo.cost_processors import build_cost_processor
 8 | from dmb.modeling.stereo.cmn import build_cmn
 9 | from dmb.modeling.stereo.disp_predictors import build_disp_predictor
10 | from dmb.modeling.stereo.disp_refinement import build_disp_refinement
11 | from dmb.modeling.stereo.losses import make_gsm_loss_evaluator
12 | 
13 | 
14 | class GeneralizedStereoModel(nn.Module):
15 |     """
16 |     A general stereo matching model which fits most methods.
17 | 
18 |     """
19 |     def __init__(self, cfg):
20 |         super(GeneralizedStereoModel, self).__init__()
21 |         self.cfg = cfg.copy()
22 |         self.max_disp = cfg.model.max_disp
23 | 
24 |         self.backbone = build_backbone(cfg)
25 | 
26 |         self.cost_processor = build_cost_processor(cfg)
27 | 
28 |         # confidence measurement network
29 |         self.cmn = None
30 |         if 'cmn' in cfg.model:
31 |             self.cmn = build_cmn(cfg)
32 | 
33 |         self.disp_predictor = build_disp_predictor(cfg)
34 | 
35 |         self.disp_refinement = None
36 |         if 'disp_refinement' in cfg.model:
37 |             self.disp_refinement = build_disp_refinement(cfg)
38 | 
39 |         # make general stereo matching loss evaluator
40 |         self.loss_evaluator = make_gsm_loss_evaluator(cfg)
41 | 
42 |     def forward(self, batch):
43 |         # parse batch
44 |         ref_img, tgt_img = batch['leftImage'], batch['rightImage']
45 |         target = batch['leftDisp'] if 'leftDisp' in batch else None
46 | 
47 |         # extract image feature
48 |         ref_fms, tgt_fms = self.backbone(ref_img, tgt_img)
49 | 
50 |         # compute cost volume
51 |         costs = self.cost_processor(ref_fms, tgt_fms)
52 | 
53 |         # disparity prediction
54 |         disps = [self.disp_predictor(cost) for cost in costs]
55 | 
56 |         # disparity refinement
57 |         if self.disp_refinement is not None:
58 |             disps = self.disp_refinement(disps, ref_fms, tgt_fms, ref_img, tgt_img)
59 | 
60 |         if self.training:
61 |             loss_dict = dict()
62 |             variance = None
63 |             if hasattr(self.cfg.model.losses, 'focal_loss'):
64 |                 variance = self.cfg.model.losses.focal_loss.get('variance', None)
65 | 
66 |             if self.cmn is not None:
67 |                 # confidence measurement network
68 |                 variance, cm_losses = self.cmn(costs, target)
69 |                 loss_dict.update(cm_losses)
70 | 
71 |             loss_args = dict(
72 |                 variance = variance,
73 |             )
74 | 
75 |             gsm_loss_dict = self.loss_evaluator(disps, costs, target, **loss_args)
76 |             loss_dict.update(gsm_loss_dict)
77 | 
78 |             return {}, loss_dict
79 | 
80 |         else:
81 | 
82 |             results = dict(
83 |                 disps=disps,
84 |                 costs=costs,
85 |             )
86 | 
87 |             if self.cmn is not None:
88 |                 # confidence measurement network
89 |                 variance, confs = self.cmn(costs, target)
90 |                 results.update(confs=confs)
91 | 
92 |             return results, {}
93 | 


--------------------------------------------------------------------------------
/dmb/modeling/stereo/registry.py:
--------------------------------------------------------------------------------
1 | from dmb.utils import Registry
2 | 
3 | BACKBONES = Registry()
4 | 


--------------------------------------------------------------------------------
/dmb/ops/__init__.py:
--------------------------------------------------------------------------------
1 | from .spn import GateRecurrent2dnoind
2 | from detectron2.layers import DeformConv, ModulatedDeformConv
3 | 


--------------------------------------------------------------------------------
/dmb/ops/spn/__init__.py:
--------------------------------------------------------------------------------
1 | from .modules import GateRecurrent2dnoind


--------------------------------------------------------------------------------
/dmb/ops/spn/functions/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DeepMotionAIResearch/DenseMatchingBenchmark/010aeb66e3ceaf3d866036b0ca751861df39432d/dmb/ops/spn/functions/__init__.py


--------------------------------------------------------------------------------
/dmb/ops/spn/functions/gaterecurrent2dnoind.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch.autograd import Function
 3 | try:
 4 |     from ..build.lib import gaterecurrent2dnoind_cuda as gaterecurrent2d
 5 | except ImportError:
 6 |     import gaterecurrent2dnoind_cuda as gaterecurrent2d
 7 | 
 8 | class GateRecurrent2dnoindFunction(Function):
 9 | 
10 |     @staticmethod
11 |     def forward(ctx, X, G1, G2, G3, horizontal, reverse):
12 |         num, channels, height, width = X.size()
13 |         output = torch.zeros(num, channels, height, width,device=X.device)
14 | 
15 |         if not X.is_cuda:
16 |             print("cpu version is not ready at this time")
17 |             return 0
18 |         else:
19 |             gaterecurrent2d.forward(horizontal,reverse, X, G1, G2, G3, output)
20 |             ctx.save_for_backward(X, G1, G2, G3, output)
21 |             ctx.hiddensize = X.size()
22 |             ctx.horizontal = horizontal
23 |             ctx.reverse = reverse
24 |             return output
25 | 
26 |     @staticmethod
27 |     def backward(ctx, grad_output):
28 |         assert(ctx.hiddensize is not None and grad_output.is_cuda)
29 |         num, channels, height, width = ctx.hiddensize
30 |         X, G1, G2, G3, output = ctx.saved_tensors
31 | 
32 |         grad_X = torch.zeros(num, channels, height, width, device=X.device)
33 |         grad_G1 = torch.zeros(num, channels, height, width, device=X.device)
34 |         grad_G2 = torch.zeros(num, channels, height, width, device=X.device)
35 |         grad_G3 = torch.zeros(num, channels, height, width, device=X.device)
36 | 
37 |         gaterecurrent2d.backward(ctx.horizontal, ctx.reverse, output, grad_output, X, G1, G2, G3, grad_X, grad_G1, grad_G2, grad_G3)
38 | 
39 |         return (grad_X, grad_G1, grad_G2, grad_G3)+(None,)*2
40 | 


--------------------------------------------------------------------------------
/dmb/ops/spn/gaterecurrent2dnoind_cuda.egg-info/PKG-INFO:
--------------------------------------------------------------------------------
 1 | Metadata-Version: 1.0
 2 | Name: gaterecurrent2dnoind-cuda
 3 | Version: 0.0.0
 4 | Summary: UNKNOWN
 5 | Home-page: UNKNOWN
 6 | Author: UNKNOWN
 7 | Author-email: UNKNOWN
 8 | License: UNKNOWN
 9 | Description: UNKNOWN
10 | Platform: UNKNOWN
11 | 


--------------------------------------------------------------------------------
/dmb/ops/spn/gaterecurrent2dnoind_cuda.egg-info/SOURCES.txt:
--------------------------------------------------------------------------------
1 | setup.py
2 | gaterecurrent2dnoind_cuda.egg-info/PKG-INFO
3 | gaterecurrent2dnoind_cuda.egg-info/SOURCES.txt
4 | gaterecurrent2dnoind_cuda.egg-info/dependency_links.txt
5 | gaterecurrent2dnoind_cuda.egg-info/top_level.txt
6 | src/gaterecurrent2dnoind_cuda.cpp
7 | src/gaterecurrent2dnoind_kernel.cu


--------------------------------------------------------------------------------
/dmb/ops/spn/gaterecurrent2dnoind_cuda.egg-info/dependency_links.txt:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/dmb/ops/spn/gaterecurrent2dnoind_cuda.egg-info/top_level.txt:
--------------------------------------------------------------------------------
1 | gaterecurrent2dnoind_cuda
2 | 


--------------------------------------------------------------------------------
/dmb/ops/spn/modules/__init__.py:
--------------------------------------------------------------------------------
1 | from .gaterecurrent2dnoind import GateRecurrent2dnoind
2 | 


--------------------------------------------------------------------------------
/dmb/ops/spn/modules/gaterecurrent2dnoind.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | from ..functions.gaterecurrent2dnoind import GateRecurrent2dnoindFunction
 3 | 
 4 | class GateRecurrent2dnoind(nn.Module):
 5 |     """docstring for ."""
 6 |     def __init__(self, horizontal_, reverse_):
 7 |         super(GateRecurrent2dnoind, self).__init__()
 8 |         self.horizontal = horizontal_
 9 |         self.reverse = reverse_
10 | 
11 |     def forward(self, X, G1, G2, G3):
12 |         return GateRecurrent2dnoindFunction.apply(X, G1, G2, G3,self.horizontal, self.reverse)
13 | 


--------------------------------------------------------------------------------
/dmb/ops/spn/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup
 2 | from torch.utils.cpp_extension import CppExtension, BuildExtension, CUDAExtension
 3 | 
 4 | setup(
 5 |     name='gaterecurrent2dnoind_cuda',
 6 |     ext_modules=[
 7 |         CUDAExtension('gaterecurrent2dnoind_cuda', [
 8 |             'src/gaterecurrent2dnoind_cuda.cpp',
 9 |             'src/gaterecurrent2dnoind_kernel.cu',
10 |         ])
11 |     ],
12 |     cmdclass={
13 |         'build_ext': BuildExtension
14 |     })
15 | 
16 | 
17 | 


--------------------------------------------------------------------------------
/dmb/ops/spn/src/gaterecurrent2dnoind_cuda.cpp:
--------------------------------------------------------------------------------
 1 | // gaterecurrent2dnoind_cuda.c
 2 | //#include <THC/THC.h>
 3 | #include <math.h>
 4 | #include <torch/extension.h>
 5 | #include "gaterecurrent2dnoind_kernel.h"
 6 | 
 7 | int gaterecurrent2dnoind_forward_cuda(int horizontal_, int reverse_, torch::Tensor X, torch::Tensor G1, torch::Tensor G2, torch::Tensor G3, torch::Tensor output)
 8 | {
 9 | 	// Grab the input tensor to flat
10 | 	float * X_data = X.data<float>();
11 | 	float * G1_data = G1.data<float>();
12 | 	float * G2_data = G2.data<float>();
13 | 	float * G3_data = G3.data<float>();
14 | 	float * H_data = output.data<float>();
15 | 
16 | 	// dimensions
17 | 	int num_ =  X.size(0);
18 | 	int channels_ = X.size(1);
19 | 	int height_ = X.size(2);
20 | 	int width_ = X.size(3);
21 | 
22 | 
23 | 	if(horizontal_ && !reverse_) // left to right
24 | 	{
25 | 		//const int count = height_ * channels_ * num_;
26 | 		Forward_left_right(num_, channels_, height_, width_, X_data, G1_data, G2_data, G3_data, H_data, horizontal_, reverse_);
27 | 	}
28 | 	else if(horizontal_ && reverse_) // right to left
29 | 	{
30 | 		Forward_right_left(num_, channels_, height_, width_, X_data, G1_data, G2_data, G3_data, H_data, horizontal_, reverse_);
31 | 	}
32 | 	else if(!horizontal_ && !reverse_) // top to bottom
33 | 	{
34 | 		Forward_top_bottom(num_, channels_, height_, width_, X_data, G1_data, G2_data, G3_data, H_data, horizontal_, reverse_);
35 | 	}
36 | 	else
37 | 	{
38 | 		Forward_bottom_top(num_, channels_, height_, width_, X_data, G1_data, G2_data, G3_data, H_data, horizontal_, reverse_);
39 | 	}
40 | 
41 | 	return 1;
42 | }
43 | 
44 | int gaterecurrent2dnoind_backward_cuda(int horizontal_, int reverse_, torch::Tensor top, torch::Tensor top_grad, torch::Tensor X, torch::Tensor G1, torch::Tensor G2, torch::Tensor G3, torch::Tensor X_grad, torch::Tensor G1_grad, torch::Tensor G2_grad, torch::Tensor G3_grad)
45 | {
46 | 	//Grab the input tensor to flat
47 | 	float * X_data = X.data<float>();
48 | 	float * G1_data = G1.data<float>();
49 | 	float * G2_data = G2.data<float>();
50 | 	float * G3_data = G3.data<float>();
51 | 	float * H_data = top.data<float>();
52 | 
53 | 	float * H_diff = top_grad.data<float>();
54 | 
55 | 	float * X_diff = X_grad.data<float>();
56 | 	float * G1_diff = G1_grad.data<float>();
57 | 	float * G2_diff = G2_grad.data<float>();
58 | 	float * G3_diff = G3_grad.data<float>();
59 | 
60 | 	// dimensions
61 | 	int num_ = X.size(0);
62 | 	int channels_ = X.size(1);
63 | 	int height_ = X.size(2);
64 | 	int width_ = X.size(3);
65 | 
66 | 
67 | 	if(horizontal_ && ! reverse_) //left to right
68 | 	{
69 | 		Backward_left_right(num_, channels_, height_, width_, X_data, G1_data, G2_data, G3_data, H_data, X_diff, G1_diff, G2_diff, G3_diff, H_diff, horizontal_, reverse_);
70 | 	}
71 | 	else if(horizontal_ &&  reverse_) //right to left
72 | 	{
73 | 		Backward_right_left(num_, channels_, height_, width_, X_data, G1_data, G2_data, G3_data, H_data, X_diff, G1_diff, G2_diff, G3_diff, H_diff, horizontal_, reverse_);
74 | 	}
75 | 	else if(!horizontal_ &&  !reverse_) //top to bottom
76 | 	{
77 | 		Backward_top_bottom(num_, channels_, height_, width_, X_data, G1_data, G2_data, G3_data, H_data, X_diff, G1_diff, G2_diff, G3_diff, H_diff, horizontal_, reverse_);
78 | 	}
79 | 	else {
80 | 		Backward_bottom_top(num_, channels_, height_, width_, X_data, G1_data, G2_data, G3_data, H_data, X_diff, G1_diff, G2_diff, G3_diff, H_diff, horizontal_, reverse_);
81 | 	}
82 | 
83 | 	return 1;
84 | }
85 | 
86 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m
87 | ) {
88 | m.def("forward", &gaterecurrent2dnoind_forward_cuda, "InnerProduct forward (CUDA)");
89 | m.def("backward", &gaterecurrent2dnoind_backward_cuda, "InnerProduct backward (CUDA)");
90 | }


--------------------------------------------------------------------------------
/dmb/ops/spn/src/gaterecurrent2dnoind_cuda.h:
--------------------------------------------------------------------------------
1 | int gaterecurrent2dnoind_forward_cuda(int horizontal_, int reverse_, torch::Tensor X, torch::Tensor G1, torch::Tensor G2, torch::Tensor G3, torch::Tensor output);
2 | int gaterecurrent2dnoind_backward_cuda(int horizontal_, int reverse_, torch::Tensor top, torch::Tensor top_grad, torch::Tensor X, torch::Tensor G1, torch::Tensor G2, torch::Tensor G3, torch::Tensor X_diff, torch::Tensor G1_diff, torch::Tensor G2_diff, torch::Tensor G3_diff);
3 | 


--------------------------------------------------------------------------------
/dmb/ops/spn/src/gaterecurrent2dnoind_kernel.h:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | void Forward_left_right(int num_, int channels_, int height_, int width_,  float * X,  float * G1,  float * G2,  float * G3, float * H, int horizontal_, int reverse_);
 4 | void Forward_right_left(int num_, int channels_, int height_, int width_,  float * X,  float * G1,  float * G2,  float * G3, float * H, int horizontal_, int reverse_);
 5 | void Forward_top_bottom(int num_, int channels_, int height_, int width_,  float * X,  float * G1,  float * G2,  float * G3, float * H, int horizontal_, int reverse_);
 6 | void Forward_bottom_top(int num_, int channels_, int height_, int width_,  float * X,  float * G1,  float * G2,  float * G3, float * H, int horizontal_, int reverse_);
 7 | void Backward_left_right(int num_, int channels_, int height_, int width_,  float * X,  float * G1,  float * G2,  float * G3,  float * H, float * X_diff, float * G1_diff, float * G2_diff, float * G3_diff, float * H_diff, int horizontal_, int reverse_);
 8 | void Backward_right_left(int num_, int channels_, int height_, int width_,  float * X,  float * G1,  float * G2,  float * G3,  float * H, float * X_diff, float * G1_diff, float * G2_diff, float * G3_diff, float * H_diff, int horizontal_, int reverse_);
 9 | void Backward_top_bottom(int num_, int channels_, int height_, int width_,  float * X,  float * G1,  float * G2,  float * G3,  float * H, float * X_diff, float * G1_diff, float * G2_diff, float * G3_diff, float * H_diff, int horizontal_, int reverse_);
10 | void Backward_bottom_top(int num_, int channels_, int height_, int width_,  float * X,  float * G1,  float * G2,  float * G3,  float * H, float * X_diff, float * G1_diff, float * G2_diff, float * G3_diff, float * H_diff, int horizontal_, int reverse_);
11 | 
12 | 
13 | 


--------------------------------------------------------------------------------
/dmb/utils/__init__.py:
--------------------------------------------------------------------------------
1 | from .dist_utils import all_reduce_grads, DistOptimizerHook, DistApexOptimizerHook
2 | from .collect_env import collect_env_info
3 | from .env import init_dist, set_random_seed, get_root_logger
4 | from .tensorboard_logger import TensorboardLoggerHook
5 | from .text_logger import TextLoggerHook
6 | from .registry import Registry
7 | 


--------------------------------------------------------------------------------
/dmb/utils/collect_env.py:
--------------------------------------------------------------------------------
 1 | import PIL
 2 | 
 3 | from torch.utils.collect_env import get_pretty_env_info
 4 | 
 5 | 
 6 | def get_pil_version():
 7 |     return "\n        Pillow ({})".format(PIL.__version__)
 8 | 
 9 | 
10 | def collect_env_info():
11 |     env_str = get_pretty_env_info()
12 |     env_str += get_pil_version()
13 |     return env_str
14 | 


--------------------------------------------------------------------------------
/dmb/utils/dist_utils.py:
--------------------------------------------------------------------------------
 1 | from collections import OrderedDict
 2 | 
 3 | import torch.distributed as dist
 4 | from torch._utils import (
 5 |     _flatten_dense_tensors, _unflatten_dense_tensors, _take_tensors
 6 | )
 7 | from mmcv.runner import OptimizerHook
 8 | 
 9 | try:
10 |     from apex import amp
11 |     import apex
12 | except ImportError:
13 |     raise ImportError('Use APEX for multi-precision via apex.amp')
14 | 
15 | 
16 | def _all_reduce_coalesced(tensors, world_size, bucket_size_mb=-1):
17 |     if bucket_size_mb > 0:
18 |         bucket_size_bytes = bucket_size_mb * 1024 * 1024
19 |         buckets = _take_tensors(tensors, bucket_size_bytes)
20 |     else:
21 |         buckets = OrderedDict()
22 |         for tensor in tensors:
23 |             tp = tensor.type()
24 |             if tp not in buckets:
25 |                 buckets[tp] = []
26 |             buckets[tp].append(tensor)
27 |         buckets = buckets.values()
28 | 
29 |     for bucket in buckets:
30 |         flat_tensors = _flatten_dense_tensors(bucket)
31 |         dist.all_reduce(flat_tensors)
32 |         flat_tensors.div_(world_size)
33 |         for tensor, synced in zip(
34 |                 bucket, _unflatten_dense_tensors(flat_tensors, bucket)):
35 |             tensor.copy_(synced)
36 | 
37 | 
38 | def all_reduce_grads(model, coalesce=True, bucket_size_mb=-1):
39 |     grads = [
40 |         param.grad.data for param in model.parameters()
41 |         if param.requires_grad and param.grad is not None
42 |     ]
43 | 
44 |     world_size = dist.get_world_size()
45 |     if coalesce:
46 |         _all_reduce_coalesced(grads, world_size, bucket_size_mb)
47 |     else:
48 |         for tensor in grads:
49 |             dist.all_reduce(tensor.div_(world_size))
50 | 
51 | 
52 | class DistOptimizerHook(OptimizerHook):
53 | 
54 |     def __init__(self, grad_clip=None, coalesce=True, bucket_size_mb=-1):
55 |         super(DistOptimizerHook, self).__init__(grad_clip)
56 |         self.grad_clip = grad_clip
57 |         self.coalesce = coalesce
58 |         self.bucket_size_mb = bucket_size_mb
59 | 
60 |     def after_train_iter(self, runner):
61 |         runner.optimizer.zero_grad()
62 |         runner.outputs['loss'].backward()
63 |         all_reduce_grads(runner.model, self.coalesce, self.bucket_size_mb)
64 |         if self.grad_clip is not None:
65 |             self.clip_grads(runner.model.parameters())
66 |         runner.optimizer.step()
67 | 
68 | 
69 | class DistApexOptimizerHook(OptimizerHook):
70 | 
71 |     def __init__(self, grad_clip=None, coalesce=True, bucket_size_mb=-1, use_apex=True):
72 |         super(DistApexOptimizerHook, self).__init__(grad_clip)
73 |         self.grad_clip = grad_clip
74 |         self.coalesce = coalesce
75 |         self.bucket_size_mb = bucket_size_mb
76 |         self.use_apex = use_apex
77 | 
78 |     def after_train_iter(self, runner):
79 |         runner.model.zero_grad()
80 |         runner.optimizer.zero_grad()
81 |         # Note: If mixed precision is not used, this ends up doing nothing
82 |         # Otherwise apply loss scaling for mixed-precision recipe
83 |         with amp.scale_loss(runner.outputs['loss'], runner.optimizer) as scaled_losses:
84 |             scaled_losses.backward()
85 |         all_reduce_grads(runner.model, self.coalesce, self.bucket_size_mb)
86 |         if self.grad_clip is not None:
87 |             self.clip_grads(runner.model.parameters())
88 |         runner.optimizer.step()
89 | 


--------------------------------------------------------------------------------
/dmb/utils/env.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | import os
 3 | import sys
 4 | import random
 5 | import subprocess
 6 | 
 7 | import numpy as np
 8 | 
 9 | import torch
10 | import torch.distributed as dist
11 | import torch.multiprocessing as mp
12 | 
13 | from mmcv.runner import get_dist_info
14 | 
15 | 
16 | def _init_dist_pytorch(backend, **kwargs):
17 |     # TODO: use local_rank instead of rank % num_gpus
18 |     rank = int(os.environ['RANK'])
19 |     num_gpus = torch.cuda.device_count()
20 |     torch.cuda.set_device(rank % num_gpus)
21 |     dist.init_process_group(backend=backend, **kwargs)
22 | 
23 | 
24 | def _init_dist_mpi(backend, **kwargs):
25 |     raise NotImplementedError
26 | 
27 | 
28 | def _init_dist_slurm(backend, port=29500, **kwargs):
29 |     proc_id = int(os.environ['SLURM_PROCID'])
30 |     ntasks = int(os.environ['SLURM_NTASKS'])
31 |     node_list = os.environ['SLURM_NODELIST']
32 |     num_gpus = torch.cuda.device_count()
33 |     torch.cuda.set_device(proc_id % num_gpus)
34 |     addr = subprocess.getoutput(
35 |         'scontrol show hostname {} | head -n1'.format(node_list))
36 |     os.environ['MASTER_PORT'] = str(port)
37 |     os.environ['MASTER_ADDR'] = addr
38 |     os.environ['WORLD_SIZE'] = str(ntasks)
39 |     os.environ['RANK'] = str(proc_id)
40 |     dist.init_process_group(backend=backend)
41 | 
42 | 
43 | def init_dist(launcher, backend='nccl', **kwargs):
44 |     if mp.get_start_method(allow_none=True) is None:
45 |         mp.set_start_method('spawn')
46 |     if launcher == 'pytorch':
47 |         _init_dist_pytorch(backend, **kwargs)
48 |     elif launcher == 'mpi':
49 |         _init_dist_mpi(backend, **kwargs)
50 |     elif launcher == 'slurm':
51 |         _init_dist_slurm(backend, **kwargs)
52 |     else:
53 |         raise ValueError('Invalid launcher type: {}'.format(launcher))
54 | 
55 | 
56 | def set_random_seed(seed):
57 |     random.seed(seed)
58 |     np.random.seed(seed)
59 |     torch.manual_seed(seed)
60 |     torch.cuda.manual_seed_all(seed)
61 | 
62 | 
63 | def get_root_logger(save_dir, log_level=logging.INFO, filename="log.txt"):
64 |     logger = logging.getLogger()
65 |     if not logger.hasHandlers():
66 |         logging.basicConfig(
67 |             format='%(asctime)s - %(levelname)s - %(message)s',
68 |             level=log_level)
69 |     rank, _ = get_dist_info()
70 |     if rank != 0:
71 |         logger.setLevel('ERROR')
72 |     if save_dir:
73 |         fh = logging.FileHandler(os.path.join(save_dir, filename))
74 |         fh.setLevel(log_level)
75 |         formatter = logging.Formatter("%(asctime)s - %(levelname)s: %(message)s")
76 |         fh.setFormatter(formatter)
77 |         logger.addHandler(fh)
78 |         if rank != 0:
79 |             fh.setLevel('ERROR')
80 | 
81 |     return logger
82 | 


--------------------------------------------------------------------------------
/dmb/utils/fp16/__init__.py:
--------------------------------------------------------------------------------
1 | from .decorators import auto_fp16, force_fp32
2 | from .hooks import Fp16OptimizerHook, wrap_fp16_model
3 | 
4 | __all__ = ['auto_fp16', 'force_fp32', 'Fp16OptimizerHook', 'wrap_fp16_model']
5 | 


--------------------------------------------------------------------------------
/dmb/utils/fp16/utils.py:
--------------------------------------------------------------------------------
 1 | from collections import abc
 2 | 
 3 | import numpy as np
 4 | import torch
 5 | 
 6 | 
 7 | def cast_tensor_type(inputs, src_type, dst_type):
 8 |     if isinstance(inputs, torch.Tensor):
 9 |         return inputs.to(dst_type)
10 |     elif isinstance(inputs, str):
11 |         return inputs
12 |     elif isinstance(inputs, np.ndarray):
13 |         return inputs
14 |     elif isinstance(inputs, abc.Mapping):
15 |         return type(inputs)({
16 |             k: cast_tensor_type(v, src_type, dst_type)
17 |             for k, v in inputs.items()
18 |         })
19 |     elif isinstance(inputs, abc.Iterable):
20 |         return type(inputs)(
21 |             cast_tensor_type(item, src_type, dst_type) for item in inputs)
22 |     else:
23 |         return inputs
24 | 


--------------------------------------------------------------------------------
/dmb/utils/registry.py:
--------------------------------------------------------------------------------
 1 | # Copy from maskrcnn_benchmark
 2 | # https://github.com/facebookresearch/maskrcnn-benchmark/blob/master/maskrcnn_benchmark/utils/registry.py
 3 | 
 4 | 
 5 | def _register_generic(module_dict, module_name, module):
 6 |     assert module_name not in module_dict
 7 |     module_dict[module_name] = module
 8 | 
 9 | 
10 | class Registry(dict):
11 |     '''
12 |     A helper class for managing registering modules, it extends a dictionary
13 |     and provides a register functions.
14 |     Eg. creeting a registry:
15 |         some_registry = Registry({"default": default_module})
16 |     There're two ways of registering new modules:
17 |     1): normal way is just calling register function:
18 |         def foo():
19 |             ...
20 |         some_registry.register("foo_module", foo)
21 |     2): used as decorator when declaring the module:
22 |         @some_registry.register("foo_module")
23 |         @some_registry.register("foo_modeul_nickname")
24 |         def foo():
25 |             ...
26 |     Access of module is just like using a dictionary, eg:
27 |         f = some_registry["foo_modeul"]
28 |     '''
29 | 
30 |     def __init__(self, *args, **kwargs):
31 |         super(Registry, self).__init__(*args, **kwargs)
32 | 
33 |     def register(self, module_name, module=None):
34 |         # used as function call
35 |         if module is not None:
36 |             _register_generic(self, module_name, module)
37 |             return
38 | 
39 |         # used as decorator
40 |         def register_fn(fn):
41 |             _register_generic(self, module_name, fn)
42 |             return fn
43 | 
44 |         return register_fn
45 | 


--------------------------------------------------------------------------------
/dmb/utils/solver.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | from mmcv.runner import obj_from_dict
 4 | 
 5 | 
 6 | def build_optimizer(model, optimizer_cfg):
 7 |     """Build optimizer from configs.
 8 |     Args:
 9 |         model (:obj:`nn.Module`): The model with parameters to be optimized.
10 |         optimizer_cfg (dict): The config dict of the optimizer.
11 |             Positional fields are:
12 |                 - type: class name of the optimizer.
13 |                 - lr: base learning rate.
14 |             Optional fields are:
15 |                 - any arguments of the corresponding optimizer type, e.g.,
16 |                   weight_decay, momentum, etc.
17 |                 - paramwise_options: a dict with 3 accepted fileds
18 |                   (bias_lr_mult, bias_decay_mult, norm_decay_mult).
19 |                   `bias_lr_mult` and `bias_decay_mult` will be multiplied to
20 |                   the lr and weight decay respectively for all bias parameters
21 |                   (except for the normalization layers), and
22 |                   `norm_decay_mult` will be multiplied to the weight decay
23 |                   for all weight and bias parameters of normalization layers.
24 |     Returns:
25 |         torch.optim.Optimizer: The initialized optimizer.
26 |     """
27 |     if hasattr(model, 'module'):
28 |         model = model.module
29 | 
30 |     optimizer_cfg = optimizer_cfg.copy()
31 |     paramwise_options = optimizer_cfg.pop('paramwise_options', None)
32 |     # if no paramwise option is specified, just use the global setting
33 |     if paramwise_options is None:
34 |         return obj_from_dict(optimizer_cfg, torch.optim, dict(params=model.parameters()))
35 |     else:
36 |         assert isinstance(paramwise_options, dict)
37 |         # get base lr and weight decay
38 |         base_lr = optimizer_cfg['lr']
39 |         base_wd = optimizer_cfg.get('weight_decay', None)
40 |         # weight_decay must be explicitly specified if mult is specified
41 |         if ('bias_decay_mult' in paramwise_options or 'norm_decay_mult' in paramwise_options):
42 |             assert base_wd is not None
43 | 
44 |         # get param-wise options
45 |         bias_lr_mult = paramwise_options.get('bias_lr_mult', 1.)
46 |         bias_decay_mult = paramwise_options.get('bias_decay_mult', 1.)
47 |         norm_decay_mult = paramwise_options.get('norm_decay_mult', 1.)
48 |         # set param-wise lr and weight decay
49 |         params = []
50 |         for name, param in model.named_parameters():
51 |             if not param.requires_grad:
52 |                 continue
53 | 
54 |             param_group = {'params': [param]}
55 |             # for norm layers, overwrite the weight decay of weight and bias
56 |             # TODO: obtain the norm layer prefixes dynamically
57 |             if re.search(r'(bn|gn)(\d+)?.(weight|bias)', name):
58 |                 if base_wd is not None:
59 |                     param_group['weight_decay'] = base_wd * norm_decay_mult
60 |             # for other layers, overwrite both lr and weight decay of bias
61 |             elif name.endswith('.bias'):
62 |                 param_group['lr'] = base_lr * bias_lr_mult
63 |                 if base_wd is not None:
64 |                     param_group['weight_decay'] = base_wd * bias_decay_mult
65 |             # otherwise use the global settings
66 | 
67 |             params.append(param_group)
68 | 
69 |         optimizer_cls = getattr(torch.optim, optimizer_cfg.pop('type'))
70 |         return optimizer_cls(params, **optimizer_cfg)
71 | 


--------------------------------------------------------------------------------
/dmb/utils/tensorboard_logger.py:
--------------------------------------------------------------------------------
 1 | import os.path as osp
 2 | import numpy as np
 3 | 
 4 | from mmcv.runner import LoggerHook, master_only
 5 | 
 6 | 
 7 | class TensorboardLoggerHook(LoggerHook):
 8 |     """
 9 |     Hook for starting a tensor-board logger.
10 | 
11 |     Args:
12 |         log_dir (str or Path): dir to save logger file.
13 |         interval (int): logging interval, default is 10
14 |         ignore_last:
15 |         reset_flag:
16 |         register_logWithIter_keyword:
17 |     """
18 | 
19 |     def __init__(
20 |             self,
21 |             log_dir=None,
22 |             interval=10,
23 |             ignore_last=True,
24 |             reset_flag=True,
25 |             register_logWithIter_keyword=None
26 |     ):
27 |         super(TensorboardLoggerHook, self).__init__(interval, ignore_last,
28 |                                                     reset_flag)
29 |         self.log_dir = log_dir
30 |         self.register_logWithIter_keyword = register_logWithIter_keyword
31 | 
32 |     @master_only
33 |     def before_run(self, runner):
34 |         try:
35 |             from tensorboardX import SummaryWriter
36 |         except ImportError:
37 |             raise ImportError('Please install tensorflow and tensorboardX '
38 |                               'to use TensorboardLoggerHook.')
39 |         else:
40 |             if self.log_dir is None:
41 |                 self.log_dir = osp.join(runner.work_dir, 'tf_logs')
42 |             self.writer = SummaryWriter(self.log_dir)
43 | 
44 |     @master_only
45 |     def single_log(self, tag, record, global_step):
46 |         # self-defined, in format: prefix/suffix_tag
47 |         prefix = tag.split('/')[0]
48 |         suffix_tag = '/'.join(tag.split('/')[1:])
49 |         if prefix == 'image':
50 |             self.writer.add_image(suffix_tag, record, global_step)
51 |             return
52 |         if prefix == 'figure':
53 |             self.writer.add_figure(suffix_tag, record, global_step)
54 |             return
55 |         if prefix == 'histogram':
56 |             self.writer.add_histogram(suffix_tag, record, global_step)
57 |             return
58 |         if prefix == 'scalar':
59 |             self.writer.add_scalar(suffix_tag, record, global_step)
60 |             return
61 | 
62 |         if isinstance(record, str):
63 |             self.writer.add_text(tag, record, global_step)
64 |             return
65 | 
66 |         if record.size > 1:
67 |             self.writer.add_image(tag, record, global_step)
68 |         else:
69 |             self.writer.add_scalar(tag, record, global_step)
70 | 
71 |     @master_only
72 |     def log(self, runner):
73 |         for var in runner.log_buffer.output:
74 |             if var in ['time', 'data_time']:
75 |                 continue
76 |             tag = var
77 |             record = runner.log_buffer.output[var]
78 | 
79 |             global_step = runner.epoch
80 | 
81 |             # for example, loss will be log as iteration
82 |             if isinstance(self.register_logWithIter_keyword, (tuple, list)):
83 |                 for keyword in self.register_logWithIter_keyword:
84 |                     if var.find(keyword) > -1:
85 |                         global_step = runner.iter
86 | 
87 |             global_step = global_step + 1
88 | 
89 |             if isinstance(record, (list, tuple)):
90 |                 for idx, rec in enumerate(record):
91 |                     tag = var + '/' + '{}'.format(idx)
92 |                     self.single_log(tag, rec, global_step)
93 |             else:
94 |                 self.single_log(tag, record, global_step)
95 | 
96 |     @master_only
97 |     def after_run(self, runner):
98 |         self.writer.close()
99 | 


--------------------------------------------------------------------------------
/dmb/visualization/__init__.py:
--------------------------------------------------------------------------------
 1 | from .flow import SaveResultTool as FlowSaveResultTool
 2 | from .stereo import SaveResultTool as DispSaveResultTool
 3 | 
 4 | def SaveResultTool(task):
 5 |     if task == 'stereo':
 6 |         return DispSaveResultTool()
 7 |     elif task == 'flow':
 8 |         return FlowSaveResultTool()
 9 |     else:
10 |         raise NotImplementedError
11 | 
12 | 
13 | from .flow import ShowResultTool as FlowShowResultTool
14 | from .stereo import ShowResultTool as DispShowResultTool
15 | 
16 | def ShowResultTool(task):
17 |     if task == 'stereo':
18 |         return DispShowResultTool()
19 |     elif task == 'flow':
20 |         return FlowShowResultTool()
21 |     else:
22 |         raise NotImplementedError
23 | 


--------------------------------------------------------------------------------
/dmb/visualization/flow/__init__.py:
--------------------------------------------------------------------------------
1 | from .vis import flow_to_color, flow_err_to_color, flow_max_rad, tensor_to_color, chw_to_hwc, group_color
2 | from .show_result import ShowResultTool, ShowFlow
3 | from .save_result import SaveResultTool
4 | from .vis_hooks import DistFlowVisHook


--------------------------------------------------------------------------------
/dmb/visualization/flow/save_result.py:
--------------------------------------------------------------------------------
 1 | import matplotlib.pyplot as plt
 2 | import skimage
 3 | import skimage.io
 4 | import skimage.transform
 5 | import os
 6 | import os.path as osp
 7 | 
 8 | from mmcv import mkdir_or_exist
 9 | from dmb.data.datasets.utils.load_flow import write_flo
10 | from dmb.visualization.flow.show_result import ShowResultTool
11 | 
12 | class SaveResultTool(object):
13 |     def __call__(self, result, out_dir, image_name):
14 |         result_tool = ShowResultTool()
15 |         result = result_tool(result)
16 |         if 'GrayDisparity' in result.keys():
17 |             grayEstDisp = result['GrayDisparity']
18 |             gray_save_path = osp.join(out_dir, 'flow_0')
19 |             mkdir_or_exist(gray_save_path)
20 |             skimage.io.imsave(osp.join(gray_save_path, image_name), (grayEstDisp * 256).astype('uint16'))
21 | 
22 |         if 'ColorDisparity' in result.keys():
23 |             colorEstDisp = result['ColorDisparity']
24 |             color_save_path = osp.join(out_dir, 'color_disp')
25 |             mkdir_or_exist(color_save_path)
26 |             plt.imsave(osp.join(color_save_path, image_name), colorEstDisp, cmap=plt.cm.hot)
27 | 
28 |         if 'GroupColor' in result.keys():
29 |             group_save_path = os.path.join(out_dir, 'group_flow')
30 |             mkdir_or_exist(group_save_path)
31 |             plt.imsave(osp.join(group_save_path, image_name), result['GroupColor'], cmap=plt.cm.hot)
32 | 
33 | 


--------------------------------------------------------------------------------
/dmb/visualization/stereo/__init__.py:
--------------------------------------------------------------------------------
1 | from .vis import disp_to_color, tensor_to_color, disp_err_to_color, group_color
2 | from .show_result import ShowDisp, ShowConf, ShowResultTool
3 | from .save_result import SaveResultTool
4 | from .vis_hooks import DistStereoVisHook
5 | from .sparsification_plot import sparsification_plot
6 | 


--------------------------------------------------------------------------------
/dmb/visualization/stereo/save_result.py:
--------------------------------------------------------------------------------
 1 | import matplotlib.pyplot as plt
 2 | import skimage
 3 | import skimage.io
 4 | import skimage.transform
 5 | import os
 6 | import os.path as osp
 7 | 
 8 | from mmcv import mkdir_or_exist
 9 | from dmb.visualization.stereo.show_result import ShowResultTool
10 | 
11 | 
12 | class SaveResultTool(object):
13 |     def __call__(self, result, out_dir, image_name):
14 |         result_tool = ShowResultTool()
15 |         result = result_tool(result, color_map='gray', bins=100)
16 | 
17 |         if 'GrayDisparity' in result.keys():
18 |             grayEstDisp = result['GrayDisparity']
19 |             gray_save_path = osp.join(out_dir, 'disp_0')
20 |             mkdir_or_exist(gray_save_path)
21 |             skimage.io.imsave(osp.join(gray_save_path, image_name), (grayEstDisp * 256).astype('uint16'))
22 | 
23 |         if 'ColorDisparity' in result.keys():
24 |             colorEstDisp = result['ColorDisparity']
25 |             color_save_path = osp.join(out_dir, 'color_disp')
26 |             mkdir_or_exist(color_save_path)
27 |             plt.imsave(osp.join(color_save_path, image_name), colorEstDisp, cmap=plt.cm.hot)
28 | 
29 |         if 'GroupColor' in result.keys():
30 |             group_save_path = os.path.join(out_dir, 'group_disp')
31 |             mkdir_or_exist(group_save_path)
32 |             plt.imsave(osp.join(group_save_path, image_name), result['GroupColor'], cmap=plt.cm.hot)
33 | 
34 |         if 'ColorConfidence' in result.keys():
35 |             conf_save_path = os.path.join(out_dir, 'confidence')
36 |             mkdir_or_exist(conf_save_path)
37 |             plt.imsave(osp.join(conf_save_path, image_name), result['ColorConfidence'])
38 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | numpy>=1.20.0
 2 | pandas
 3 | matplotlib
 4 | scikit-image
 5 | scipy
 6 | cffi
 7 | imageio
 8 | tqdm
 9 | tensorflow
10 | tensorboardX
11 | yacs
12 | pyyaml
13 | easydict
14 | cython
15 | mmcv==1.3.5
16 | ipython
17 | thop
18 | torch>=1.7.1
19 | torchvision>=0.8.2
20 | torchaudio>=0.7.2
21 | pypng
22 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import find_packages
 2 | from setuptools import setup
 3 | 
 4 | setup(
 5 |     name="dmb",
 6 |     version="1.0",
 7 |     author="Youmi, Minwell",
 8 |     description="dense matching benchmark in pytorch",
 9 |     packages=find_packages(exclude=("tests")),
10 | )
11 | 
12 | 


--------------------------------------------------------------------------------
/tests/data/datasets/flow/test_flying_chairs.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import torch
 3 | import unittest
 4 | 
 5 | from mmcv import Config
 6 | 
 7 | from dmb.data.datasets.flow.builder import build_flow_dataset as build_dataset
 8 | 
 9 | 
10 | class TestFlyingChairsDataset(unittest.TestCase):
11 | 
12 |     def setUp(self):
13 |         config = dict(
14 |             data=dict(
15 |                 train=dict(
16 |                     type='FlyingChairs',
17 |                     data_root='/home/youmin/data/OpticalFlow/FlyingChairs/',
18 |                     annfile='/home/youmin/data/annotations/FlyingChairs/test.json',
19 |                     input_shape=[256, 448],
20 |                     mean=[0.485, 0.456, 0.406],
21 |                     std=[0.229, 0.224, 0.225],
22 |                 )
23 |             )
24 |         )
25 |         cfg = Config(config)
26 |         self.dataset = build_dataset(cfg, 'train')
27 | 
28 |     def test_anno_loader(self):
29 |         print(self.dataset)
30 |         print(self.dataset.data_list[111])
31 | 
32 |     def test_get_item(self):
33 |         for i in range(10):
34 |             sample = self.dataset[i]
35 |             assert isinstance(sample, dict)
36 |             for k, v in zip(sample.keys(), sample.values()):
37 |                 if isinstance(v, torch.Tensor):
38 |                     print(k, ': with shape', v.shape)
39 |                 if isinstance(v, (tuple, list)):
40 |                     print(k, ': ', v)
41 |                 if v is None:
42 |                     print(k, ' is None')
43 | 
44 |     # @unittest.skip('just skip')
45 |     def test_all_data(self):
46 |         from tqdm import tqdm
47 |         for idx in tqdm(range(len(self.dataset))):
48 |             try:
49 |                 item = self.dataset[idx]
50 |             except ValueError:
51 |                 print('Cannot find: {} -> {}'.format(idx, self.dataset.data_list[idx]))
52 | 
53 | 
54 | if __name__ == '__main__':
55 |     unittest.main()
56 | 


--------------------------------------------------------------------------------
/tests/data/datasets/flow/yaml_to_json.py:
--------------------------------------------------------------------------------
 1 | import yaml
 2 | import os.path as osp
 3 | import json
 4 | from tqdm import tqdm
 5 | 
 6 | if __name__ == '__main__':
 7 |     type = 'FlyingChairs'
 8 |     root = '/home/youmin/data/OpticalFlow/{}/'.format(type)
 9 |     annFile = '/home/youmin/data/annotations/{}/flyingchairs_val.yml'.format(type)
10 |     saveFile = '/home/youmin/data/annotations/{}/eval.json'.format(type)
11 |     data_list = []
12 |     with open(file=annFile, mode='r') as fp:
13 |         data_list.extend(yaml.load(fp, Loader=yaml.BaseLoader))
14 | 
15 |     Metas = []
16 |     for idx in range(len(data_list)):
17 |         item = data_list[idx]
18 |         meta = dict(
19 |             left_image_path = item[0],
20 |             right_image_path = item[1],
21 |         )
22 |         if len(item) > 2:
23 |             meta.update(flow_path=item[2])
24 |         Metas.append(meta)
25 | 
26 |     for meta in tqdm(Metas):
27 |         for k, v in meta.items():
28 |             assert osp.exists(osp.join(root, v)), 'Metas:{} not exists'.format(v)
29 | 
30 |     info_str = '{} Dataset contains:\n' \
31 |                '    {:5d} samples'.format(type, len(Metas))
32 |     print(info_str)
33 | 
34 |     print('Save to {}'.format(saveFile))
35 |     with open(file=saveFile, mode='w') as fp:
36 |         json.dump(Metas, fp=fp)
37 | 


--------------------------------------------------------------------------------
/tests/data/datasets/stereo/test_kitti.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import torch
 3 | import unittest
 4 | 
 5 | from mmcv import Config
 6 | 
 7 | from dmb.data.datasets.stereo import build_stereo_dataset as build_dataset
 8 | 
 9 | 
10 | class TestKITTIDataset(unittest.TestCase):
11 | 
12 |     def setUp(self):
13 |         config = dict(
14 |             data=dict(
15 |                 test=dict(
16 |                     type='KITTI-2015',
17 |                     data_root='datasets/KITTI-2015/',
18 |                     annfile='datasets/KITTI-2015/annotations/full_eval.json',
19 |                     input_shape=[384, 1248],
20 |                     mean=[0.485, 0.456, 0.406],
21 |                     std=[0.229, 0.224, 0.225],
22 |                     toRAM=False,
23 |                 )
24 |             )
25 |         )
26 |         cfg = Config(config)
27 |         self.dataset = build_dataset(cfg, 'test')
28 | 
29 |         import pdb
30 |         pdb.set_trace()
31 | 
32 |     def test_anno_loader(self):
33 |         print(self.dataset)
34 |         print('toRAM: ', self.dataset.toRAM)
35 |         print(self.dataset.data_list[31])
36 | 
37 |     def test_get_item(self):
38 |         for i in range(10):
39 |             sample = self.dataset[i]
40 |             assert isinstance(sample, dict)
41 |             print("*" * 20)
42 |             print("Before scatter")
43 |             print("*" * 20)
44 |             for k, v in zip(sample.keys(), sample.values()):
45 |                 if isinstance(v, torch.Tensor):
46 |                     print(k, ': with shape', v.shape)
47 |                 if isinstance(v, (tuple, list)):
48 |                     print(k, ': ', v)
49 |                 if v is None:
50 |                     print(k, ' is None')
51 | 
52 | 
53 | if __name__ == '__main__':
54 |     unittest.main()
55 | 


--------------------------------------------------------------------------------
/tests/data/datasets/stereo/test_scene_flow.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import torch
 3 | import unittest
 4 | 
 5 | from mmcv import Config
 6 | 
 7 | from dmb.data.datasets.stereo.builder import build_stereo_dataset as build_dataset
 8 | 
 9 | 
10 | class TestSceneFlowDataset(unittest.TestCase):
11 | 
12 |     def setUp(self):
13 |         config = dict(
14 |             data=dict(
15 |                 train=dict(
16 |                     type='SceneFlow',
17 |                     data_root='/home/youmin/data/StereoMatching/SceneFlow/',
18 |                     annfile='/home/youmin/data/annotations/SceneFlow/cleanpass_train.json',
19 |                     input_shape=[256, 512],
20 |                     mean=[0.485, 0.456, 0.406],
21 |                     std=[0.229, 0.224, 0.225],
22 |                 )
23 |             )
24 |         )
25 |         cfg = Config(config)
26 |         self.dataset = build_dataset(cfg, 'train')
27 | 
28 |     def test_anno_loader(self):
29 |         print(self.dataset)
30 |         print(self.dataset.data_list[111])
31 | 
32 |     def test_get_item(self):
33 |         for i in range(10):
34 |             sample = self.dataset[i]
35 |             assert isinstance(sample, dict)
36 |             for k, v in zip(sample.keys(), sample.values()):
37 |                 if isinstance(v, torch.Tensor):
38 |                     print(k, ': with shape', v.shape)
39 |                 if isinstance(v, (tuple, list)):
40 |                     print(k, ': ', v)
41 |                 if v is None:
42 |                     print(k, ' is None')
43 | 
44 |     def test_all_data(self):
45 |         from tqdm import tqdm
46 |         for idx in tqdm(range(len(self.dataset))):
47 |             try:
48 |                 item = self.dataset[idx]
49 |             except ValueError:
50 |                 print(idx, self.dataset.data_list[idx])
51 | 
52 | 
53 | if __name__ == '__main__':
54 |     unittest.main()
55 | 


--------------------------------------------------------------------------------
/tests/modeling/stereo/backbones/test_backbones.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import sys
 3 | import torch
 4 | import torch.nn as nn
 5 | from thop import profile
 6 | from collections import Iterable
 7 | import time
 8 | import unittest
 9 | 
10 | from dmb.modeling.stereo.backbones import build_backbone
11 | from mmcv import Config
12 | 
13 | 
14 | 
15 | class testBackbones(unittest.TestCase):
16 | 
17 |     @classmethod
18 |     def setUpClass(cls):
19 |         cls.device = torch.device('cuda:1')
20 |         config_path = '/home/zhixiang/youmin/projects/depth/public/' \
21 |                       'DenseMatchingBenchmark/configs/AcfNet/scene_flow_uniform.py'
22 |         cls.cfg = Config.fromfile(config_path)
23 |         cls.backbone = build_backbone(cls.cfg)
24 |         cls.backbone.to(cls.device)
25 | 
26 |         cls.setUpTimeTestingClass()
27 |         cls.avg_time = {}
28 | 
29 |     @classmethod
30 |     def setUpTimeTestingClass(cls):
31 |         cls.iters = 50
32 | 
33 |         h, w = 384, 1248
34 |         leftImage = torch.rand(1, 3, h, w).to(cls.device)
35 |         rightImage = torch.rand(1, 3, h, w).to(cls.device)
36 | 
37 |         cls.backbone_input = [leftImage, rightImage]
38 | 
39 |         print('Input preparation successful!')
40 | 
41 |     def timeTemplate(self, module, module_name, *args, **kwargs):
42 |         with torch.cuda.device(self.device):
43 |             torch.cuda.empty_cache()
44 |         if isinstance(module, nn.Module):
45 |             module.eval()
46 | 
47 |         start_time = time.time()
48 | 
49 |         for i in range(self.iters):
50 |             with torch.no_grad():
51 |                 if len(args) > 0:
52 |                     module(*args)
53 |                 if len(kwargs) > 0:
54 |                     module(**kwargs)
55 |                 torch.cuda.synchronize(self.device)
56 |         end_time = time.time()
57 |         avg_time = (end_time - start_time) / self.iters
58 |         print('{} reference forward once takes {:.4f}ms, i.e. {:.2f}fps'.format(module_name, avg_time*1000, (1 / avg_time)))
59 | 
60 |         if isinstance(module, nn.Module):
61 |             module.train()
62 | 
63 |         self.avg_time[module_name] = avg_time
64 | 
65 |     # @unittest.skip("demonstrating skipping")
66 |     def test_0_OutputModel(self):
67 |         print(self.backbone)
68 | 
69 |     # @unittest.skip("demonstrating skipping")
70 |     def test_1_ModelTime(self):
71 |         self.timeTemplate(self.backbone, 'Model', *self.backbone_input)
72 | 
73 | 
74 | if __name__ == '__main__':
75 |     unittest.main()
76 | 
77 | 
78 | 


--------------------------------------------------------------------------------
/tests/modeling/stereo/losses/utils/test_disp2prob.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | from mmcv import Config
 4 | import time
 5 | import unittest
 6 | 
 7 | from dmb.modeling.stereo.losses.utils.disp2prob import LaplaceDisp2Prob, GaussianDisp2Prob, OneHotDisp2Prob
 8 | 
 9 | 
10 | class testLosses(unittest.TestCase):
11 | 
12 |     def setUp(self):
13 |         self.device = torch.device("cuda:1")
14 | 
15 |     def testCase1Laplace(self):
16 |         max_disp = 5
17 |         start_disp = -2
18 |         dilation = 2
19 |         disp_sample=None
20 |         variance = 2
21 |         h, w = 3, 4
22 | 
23 |         gtDisp = torch.rand(1, 1, h, w) * max_disp + start_disp
24 | 
25 |         gtDisp = gtDisp.to(self.device)
26 |         gtDisp.requires_grad = True
27 |         print('*' * 60)
28 |         print('Ground Truth Disparity:')
29 |         print(gtDisp)
30 | 
31 | 
32 |         print('*' * 60)
33 |         print('Generated disparity probability volume:')
34 |         prob_volume = LaplaceDisp2Prob(
35 |                 gtDisp, max_disp=max_disp, variance=variance,
36 |                 start_disp=start_disp, dilation=dilation, disp_sample=disp_sample
37 |             ).getProb()
38 | 
39 |         idx = 0
40 |         for i in range(start_disp, max_disp + start_disp, dilation):
41 |             print('Disparity {}:\n {}'.format(i, prob_volume[:, idx, ]))
42 |             idx += 1
43 | 
44 |     def testCase2Laplace(self):
45 |         max_disp = 5
46 |         start_disp = -2
47 |         variance = 2
48 |         h, w = 3, 4
49 |         disp_sample = torch.Tensor([-2, 0, 2]).repeat(1, h, w, 1).permute(0, 3, 1, 2).contiguous()
50 | 
51 | 
52 |         gtDisp = torch.rand(1, 1, h, w) * max_disp + start_disp
53 | 
54 |         gtDisp = gtDisp.to(self.device)
55 |         gtDisp.requires_grad = True
56 |         print('*' * 60)
57 |         print('Ground Truth Disparity:')
58 |         print(gtDisp)
59 | 
60 |         print('*' * 60)
61 |         print('Generated disparity probability volume:')
62 |         prob_volume = LaplaceDisp2Prob(
63 |             gtDisp, max_disp=max_disp, start_disp=start_disp, variance=variance, disp_sample=disp_sample
64 |         ).getProb()
65 | 
66 |         idx = 0
67 |         for i in range(disp_sample.shape[1]):
68 |             print('Disparity {}:\n {}'.format(i, prob_volume[:, idx, ]))
69 |             idx += 1
70 | 
71 | if __name__ == '__main__':
72 |     print('test probability volume!')
73 |     unittest.main()
74 | 
75 | 


--------------------------------------------------------------------------------
/tools/demo.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | python demo.py \
 3 |     --config-path ../configs/AcfNet/scene_flow_adaptive.py \
 4 |     --checkpoint-path /data/exps/AcfNet/scene_flow_adaptive/epoch_20.pth \
 5 |     --data-root ./demo_data/ \
 6 |     --device cuda:0 \
 7 |     --log-dir /data/exps/AcfNet/scene_flow_adaptive/output/ \
 8 |     --pad-to-shape 544 960 \
 9 |     --scale-factor 1.0 \
10 |     --disp-div-factor 1.0 \
11 | 


--------------------------------------------------------------------------------
/tools/demo_data/disparity/left/0.pfm:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DeepMotionAIResearch/DenseMatchingBenchmark/010aeb66e3ceaf3d866036b0ca751861df39432d/tools/demo_data/disparity/left/0.pfm


--------------------------------------------------------------------------------
/tools/demo_data/disparity/left/4.pfm:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DeepMotionAIResearch/DenseMatchingBenchmark/010aeb66e3ceaf3d866036b0ca751861df39432d/tools/demo_data/disparity/left/4.pfm


--------------------------------------------------------------------------------
/tools/demo_data/disparity/right/0.pfm:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DeepMotionAIResearch/DenseMatchingBenchmark/010aeb66e3ceaf3d866036b0ca751861df39432d/tools/demo_data/disparity/right/0.pfm


--------------------------------------------------------------------------------
/tools/demo_data/disparity/right/4.pfm:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DeepMotionAIResearch/DenseMatchingBenchmark/010aeb66e3ceaf3d866036b0ca751861df39432d/tools/demo_data/disparity/right/4.pfm


--------------------------------------------------------------------------------
/tools/demo_data/images/left/0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DeepMotionAIResearch/DenseMatchingBenchmark/010aeb66e3ceaf3d866036b0ca751861df39432d/tools/demo_data/images/left/0.png


--------------------------------------------------------------------------------
/tools/demo_data/images/left/4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DeepMotionAIResearch/DenseMatchingBenchmark/010aeb66e3ceaf3d866036b0ca751861df39432d/tools/demo_data/images/left/4.png


--------------------------------------------------------------------------------
/tools/demo_data/images/right/0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DeepMotionAIResearch/DenseMatchingBenchmark/010aeb66e3ceaf3d866036b0ca751861df39432d/tools/demo_data/images/right/0.png


--------------------------------------------------------------------------------
/tools/demo_data/images/right/4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DeepMotionAIResearch/DenseMatchingBenchmark/010aeb66e3ceaf3d866036b0ca751861df39432d/tools/demo_data/images/right/4.png


--------------------------------------------------------------------------------
/tools/dist_test.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | 
 4 | NGPUS=$1
 5 | CFG_PATH=$2
 6 | PORT=$3
 7 | SHOW=$4
 8 | 
 9 | CUDA_VISIBLE_DEVICES=0,1,2,3 python -m torch.distributed.launch --master_port $PORT --nproc_per_node=$NGPUS \
10 |         test.py $CFG_PATH --launcher pytorch --validate --gpus $NGPUS --show $SHOW
11 | 


--------------------------------------------------------------------------------
/tools/dist_train.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | 
 4 | NGPUS=$1
 5 | CFG_PATH=$2
 6 | PORT=$3
 7 | 
 8 | CUDA_VISIBLE_DEVICES=0,1,2,3 python -m torch.distributed.launch --master_port $PORT --nproc_per_node=$NGPUS \
 9 |         train.py $CFG_PATH --launcher pytorch --validate --gpus $NGPUS
10 | 


--------------------------------------------------------------------------------